19714 lines
970 KiB
Text
19714 lines
970 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_orig.py:550: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 858
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 858
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: False
|
|
Original Data
|
|
Counter({0: 353, 1: 95}) Data dim: (448, 175)
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data: ORIGINAL training
|
|
actual values: training set
|
|
imputed values: blind test set
|
|
Train data size: (448, 175)
|
|
Test data size: (410, 175)
|
|
y_train numbers: Counter({0: 353, 1: 95})
|
|
y_train ratio: 3.7157894736842105
|
|
|
|
y_test_numbers: Counter({0: 385, 1: 25})
|
|
y_test ratio: 15.4
|
|
-------------------------------------------------------------
|
|
Simple Random OverSampling
|
|
Counter({1: 353, 0: 353})
|
|
(706, 175)
|
|
Simple Random UnderSampling
|
|
Counter({0: 95, 1: 95})
|
|
(190, 175)
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 353, 1: 353})
|
|
(706, 175)
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 353, 0: 353})
|
|
(706, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: ORIGINAL
|
|
Gene name: embB
|
|
Drug name: ethambutol
|
|
|
|
Output directory: /home/tanu/git/Data/ethambutol/output/ml/tts_orig/
|
|
|
|
Sanity checks:
|
|
Total input features: 175
|
|
|
|
Training data size: (448, 175)
|
|
Test data size: (410, 175)
|
|
|
|
Target feature numbers (training data): Counter({0: 353, 1: 95})
|
|
Target features ratio (training data: 3.7157894736842105
|
|
|
|
Target feature numbers (test data): Counter({0: 385, 1: 25})
|
|
Target features ratio (test data): 15.4
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03788757 0.03593278 0.03712201 0.03725696 0.03593421 0.03633451
|
|
0.03592753 0.03521562 0.03657556 0.03584194]
|
|
|
|
mean value: 0.03640286922454834
|
|
|
|
key: score_time
|
|
value: [0.01228571 0.01192355 0.0120225 0.01505685 0.01506758 0.01510024
|
|
0.01528955 0.01501226 0.01512766 0.01487589]
|
|
|
|
mean value: 0.014176177978515624
|
|
|
|
key: test_mcc
|
|
value: [0.40089186 0.40347329 0.53452248 0.80295507 0.80295507 0.63936201
|
|
0.70710678 0.53033009 0.78360391 0.72063492]
|
|
|
|
mean value: 0.6325835486952146
|
|
|
|
key: train_mcc
|
|
value: [0.83113606 0.81550766 0.81463907 0.80735444 0.82363189 0.84816535
|
|
0.82523833 0.84858001 0.82480205 0.83278508]
|
|
|
|
mean value: 0.827183994794372
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.82222222 0.84444444 0.93333333 0.93333333 0.88888889
|
|
0.91111111 0.86666667 0.93181818 0.90909091]
|
|
|
|
mean value: 0.884090909090909
|
|
|
|
key: train_accuracy
|
|
value: [0.94540943 0.94044665 0.94044665 0.93796526 0.94292804 0.95037221
|
|
0.94292804 0.95037221 0.94306931 0.94554455]
|
|
|
|
mean value: 0.9439482347738496
|
|
|
|
key: test_fscore
|
|
value: [0.52631579 0.33333333 0.63157895 0.82352941 0.82352941 0.70588235
|
|
0.71428571 0.57142857 0.8 0.77777778]
|
|
|
|
mean value: 0.670766131013809
|
|
|
|
key: train_fscore
|
|
value: [0.8625 0.85 0.84615385 0.8427673 0.85714286 0.87654321
|
|
0.85889571 0.87804878 0.85714286 0.86419753]
|
|
|
|
mean value: 0.8593392082787062
|
|
|
|
key: test_precision
|
|
value: [0.55555556 1. 0.66666667 1. 1. 0.75
|
|
1. 0.8 1. 0.77777778]
|
|
|
|
mean value: 0.855
|
|
|
|
key: train_precision
|
|
value: [0.92 0.90666667 0.92957746 0.90540541 0.90789474 0.93421053
|
|
0.90909091 0.92307692 0.92 0.92105263]
|
|
|
|
mean value: 0.9176975263765479
|
|
|
|
key: test_recall
|
|
value: [0.5 0.2 0.6 0.7 0.7 0.66666667
|
|
0.55555556 0.44444444 0.66666667 0.77777778]
|
|
|
|
mean value: 0.5811111111111111
|
|
|
|
key: train_recall
|
|
value: [0.81176471 0.8 0.77647059 0.78823529 0.81176471 0.8255814
|
|
0.81395349 0.8372093 0.80232558 0.81395349]
|
|
|
|
mean value: 0.80812585499316
|
|
|
|
key: test_roc_auc
|
|
value: [0.69285714 0.6 0.75714286 0.85 0.85 0.80555556
|
|
0.77777778 0.70833333 0.83333333 0.86031746]
|
|
|
|
mean value: 0.773531746031746
|
|
|
|
key: train_roc_auc
|
|
value: [0.89644839 0.88899371 0.88037366 0.88311136 0.89487606 0.90490426
|
|
0.89593573 0.90914093 0.89172883 0.89754278]
|
|
|
|
mean value: 0.8943055717836265
|
|
|
|
key: test_jcc
|
|
value: [0.35714286 0.2 0.46153846 0.7 0.7 0.54545455
|
|
0.55555556 0.4 0.66666667 0.63636364]
|
|
|
|
mean value: 0.5222721722721723
|
|
|
|
key: train_jcc
|
|
value: [0.75824176 0.73913043 0.73333333 0.72826087 0.75 0.78021978
|
|
0.75268817 0.7826087 0.75 0.76086957]
|
|
|
|
mean value: 0.7535352609055274
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.95134854 0.79436994 0.90389776 0.90213203 0.86458445 0.95182395
|
|
0.8368113 0.95572114 0.86272001 0.86330676]
|
|
|
|
mean value: 0.888671588897705
|
|
|
|
key: score_time
|
|
value: [0.01662469 0.01521587 0.01726174 0.01508331 0.01561952 0.0150938
|
|
0.01508546 0.01503205 0.015378 0.01511192]
|
|
|
|
mean value: 0.015550637245178222
|
|
|
|
key: test_mcc
|
|
value: [0.67993436 0.66143783 0.61428571 0.93541435 0.80178373 0.63936201
|
|
0.85839508 0.62103443 0.70609879 0.74819006]
|
|
|
|
mean value: 0.7265936332503764
|
|
|
|
key: train_mcc
|
|
value: [1. 0.95527192 0.90160916 0.96291309 0.95527192 0.97794631
|
|
0.97043504 0.9632258 0.96325064 0.96325064]
|
|
|
|
mean value: 0.9613174511837531
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.88888889 0.86666667 0.97777778 0.93333333 0.88888889
|
|
0.95555556 0.88888889 0.90909091 0.90909091]
|
|
|
|
mean value: 0.9084848484848485
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98511166 0.96774194 0.98759305 0.98511166 0.99255583
|
|
0.99007444 0.98759305 0.98762376 0.98762376]
|
|
|
|
mean value: 0.9871029162469597
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 0.7 0.94736842 0.84210526 0.70588235
|
|
0.875 0.66666667 0.71428571 0.8 ]
|
|
|
|
mean value: 0.7667975084770751
|
|
|
|
key: train_fscore
|
|
value: [1. 0.96470588 0.92121212 0.97076023 0.96470588 0.98265896
|
|
0.97674419 0.97109827 0.97109827 0.97109827]
|
|
|
|
mean value: 0.9694082063108077
|
|
|
|
key: test_precision
|
|
value: [0.64285714 1. 0.7 1. 0.88888889 0.75
|
|
1. 0.83333333 1. 0.72727273]
|
|
|
|
mean value: 0.8542352092352092
|
|
|
|
key: train_precision
|
|
value: [1. 0.96470588 0.95 0.96511628 0.96470588 0.97701149
|
|
0.97674419 0.96551724 0.96551724 0.96551724]
|
|
|
|
mean value: 0.9694835448212966
|
|
|
|
key: test_recall
|
|
value: [0.9 0.5 0.7 0.9 0.8 0.66666667
|
|
0.77777778 0.55555556 0.55555556 0.88888889]
|
|
|
|
mean value: 0.7244444444444444
|
|
|
|
key: train_recall
|
|
value: [1. 0.96470588 0.89411765 0.97647059 0.96470588 0.98837209
|
|
0.97674419 0.97674419 0.97674419 0.97674419]
|
|
|
|
mean value: 0.9695348837209302
|
|
|
|
key: test_roc_auc
|
|
value: [0.87857143 0.75 0.80714286 0.95 0.88571429 0.80555556
|
|
0.88888889 0.76388889 0.77777778 0.9015873 ]
|
|
|
|
mean value: 0.8409126984126984
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.97763596 0.94076952 0.98351831 0.97763596 0.99103147
|
|
0.98521752 0.98364023 0.98365511 0.98365511]
|
|
|
|
mean value: 0.9806759195304451
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 0.53846154 0.9 0.72727273 0.54545455
|
|
0.77777778 0.5 0.55555556 0.66666667]
|
|
|
|
mean value: 0.6311188811188811
|
|
|
|
key: train_jcc
|
|
value: [1. 0.93181818 0.85393258 0.94318182 0.93181818 0.96590909
|
|
0.95454545 0.94382022 0.94382022 0.94382022]
|
|
|
|
mean value: 0.9412665985699693
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01367068 0.01125169 0.01007748 0.00966883 0.00968575 0.00963092
|
|
0.00967598 0.00991416 0.00964212 0.0095737 ]
|
|
|
|
mean value: 0.010279130935668946
|
|
|
|
key: score_time
|
|
value: [0.01239657 0.00932717 0.00899005 0.00876069 0.00878191 0.00884748
|
|
0.00880647 0.00885558 0.00884557 0.00887179]
|
|
|
|
mean value: 0.00924832820892334
|
|
|
|
key: test_mcc
|
|
value: [0.60276378 0.59030128 0.40291148 0.64465837 0.60795729 0.55708601
|
|
0.72222222 0.45760432 0.65956874 0.32203059]
|
|
|
|
mean value: 0.5567104087598187
|
|
|
|
key: train_mcc
|
|
value: [0.64307178 0.61919247 0.69462182 0.65645238 0.65031558 0.65324931
|
|
0.66939323 0.66854656 0.66479495 0.69697199]
|
|
|
|
mean value: 0.6616610072132635
|
|
|
|
key: test_accuracy
|
|
value: [0.84444444 0.86666667 0.77777778 0.86666667 0.82222222 0.8
|
|
0.91111111 0.84444444 0.86363636 0.75 ]
|
|
|
|
mean value: 0.8346969696969697
|
|
|
|
key: train_accuracy
|
|
value: [0.86104218 0.87096774 0.88585608 0.86352357 0.87344913 0.83622829
|
|
0.87593052 0.87344913 0.87376238 0.88613861]
|
|
|
|
mean value: 0.870034764022308
|
|
|
|
key: test_fscore
|
|
value: [0.69565217 0.66666667 0.54545455 0.72727273 0.69230769 0.64
|
|
0.77777778 0.53333333 0.72727273 0.47619048]
|
|
|
|
mean value: 0.648192812018899
|
|
|
|
key: train_fscore
|
|
value: [0.72 0.70114943 0.76041667 0.72906404 0.72727273 0.71551724
|
|
0.74226804 0.74111675 0.73846154 0.7628866 ]
|
|
|
|
mean value: 0.7338153028920759
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.75 0.5 0.66666667 0.5625 0.5
|
|
0.77777778 0.66666667 0.61538462 0.41666667]
|
|
|
|
mean value: 0.6071047008547008
|
|
|
|
key: train_precision
|
|
value: [0.62608696 0.68539326 0.68224299 0.62711864 0.66666667 0.56849315
|
|
0.66666667 0.65765766 0.66055046 0.68518519]
|
|
|
|
mean value: 0.6526061635247412
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 0.6 0.8 0.9 0.88888889
|
|
0.77777778 0.44444444 0.88888889 0.55555556]
|
|
|
|
mean value: 0.7255555555555555
|
|
|
|
key: train_recall
|
|
value: [0.84705882 0.71764706 0.85882353 0.87058824 0.8 0.96511628
|
|
0.8372093 0.84883721 0.8372093 0.86046512]
|
|
|
|
mean value: 0.8442954856361149
|
|
|
|
key: test_roc_auc
|
|
value: [0.82857143 0.77142857 0.71428571 0.84285714 0.85 0.83333333
|
|
0.86111111 0.69444444 0.87301587 0.67777778]
|
|
|
|
mean value: 0.7946825396825397
|
|
|
|
key: train_roc_auc
|
|
value: [0.85591935 0.81479837 0.87595265 0.86611173 0.84654088 0.88318905
|
|
0.86182232 0.8644817 0.86042855 0.87677344]
|
|
|
|
mean value: 0.8606018030922475
|
|
|
|
key: test_jcc
|
|
value: [0.53333333 0.5 0.375 0.57142857 0.52941176 0.47058824
|
|
0.63636364 0.36363636 0.57142857 0.3125 ]
|
|
|
|
mean value: 0.4863690476190476
|
|
|
|
key: train_jcc
|
|
value: [0.5625 0.53982301 0.61344538 0.57364341 0.57142857 0.55704698
|
|
0.59016393 0.58870968 0.58536585 0.61666667]
|
|
|
|
mean value: 0.5798793481318663
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01128411 0.00987768 0.00978041 0.00982451 0.00985265 0.00980425
|
|
0.01003218 0.00995755 0.01033115 0.00991154]
|
|
|
|
mean value: 0.010065603256225585
|
|
|
|
key: score_time
|
|
value: [0.00936818 0.0089047 0.00883746 0.00892639 0.00886655 0.00878286
|
|
0.00888276 0.00894094 0.00882745 0.00887847]
|
|
|
|
mean value: 0.008921575546264649
|
|
|
|
key: test_mcc
|
|
value: [0.48571429 0.41931393 0.10482848 0.49135381 0.40089186 0.03904344
|
|
0.23426064 0.16666667 0.39723602 0.17349856]
|
|
|
|
mean value: 0.29128077065715524
|
|
|
|
key: train_mcc
|
|
value: [0.45932002 0.4225046 0.48796447 0.39888858 0.40529872 0.44673035
|
|
0.46070614 0.41379661 0.42515227 0.43936752]
|
|
|
|
mean value: 0.4359729278396321
|
|
|
|
key: test_accuracy
|
|
value: [0.82222222 0.82222222 0.73333333 0.84444444 0.8 0.75555556
|
|
0.8 0.73333333 0.79545455 0.77272727]
|
|
|
|
mean value: 0.7879292929292929
|
|
|
|
key: train_accuracy
|
|
value: [0.83126551 0.82630273 0.84119107 0.81885856 0.82133995 0.82878412
|
|
0.83126551 0.81885856 0.82673267 0.82920792]
|
|
|
|
mean value: 0.8273806599022184
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.5 0.25 0.53333333 0.52631579 0.15384615
|
|
0.30769231 0.33333333 0.52631579 0.28571429]
|
|
|
|
mean value: 0.4016550992866782
|
|
|
|
key: train_fscore
|
|
value: [0.55844156 0.51388889 0.57894737 0.49655172 0.5 0.54304636
|
|
0.55844156 0.51655629 0.51388889 0.53061224]
|
|
|
|
mean value: 0.531037488112446
|
|
|
|
key: test_precision
|
|
value: [0.6 0.66666667 0.33333333 0.8 0.55555556 0.25
|
|
0.5 0.33333333 0.5 0.4 ]
|
|
|
|
mean value: 0.4938888888888889
|
|
|
|
key: train_precision
|
|
value: [0.62318841 0.62711864 0.65671642 0.6 0.61016949 0.63076923
|
|
0.63235294 0.6 0.63793103 0.63934426]
|
|
|
|
mean value: 0.6257590428024311
|
|
|
|
key: test_recall
|
|
value: [0.6 0.4 0.2 0.4 0.5 0.11111111
|
|
0.22222222 0.33333333 0.55555556 0.22222222]
|
|
|
|
mean value: 0.35444444444444445
|
|
|
|
key: train_recall
|
|
value: [0.50588235 0.43529412 0.51764706 0.42352941 0.42352941 0.47674419
|
|
0.5 0.45348837 0.43023256 0.45348837]
|
|
|
|
mean value: 0.46198358413132695
|
|
|
|
key: test_roc_auc
|
|
value: [0.74285714 0.67142857 0.54285714 0.68571429 0.69285714 0.51388889
|
|
0.58333333 0.58333333 0.70634921 0.56825397]
|
|
|
|
mean value: 0.6290873015873015
|
|
|
|
key: train_roc_auc
|
|
value: [0.71206067 0.68305586 0.72266001 0.67402886 0.67560118 0.7005172
|
|
0.71056782 0.68573472 0.68209741 0.69215299]
|
|
|
|
mean value: 0.693847673660013
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.33333333 0.14285714 0.36363636 0.35714286 0.08333333
|
|
0.18181818 0.2 0.35714286 0.16666667]
|
|
|
|
mean value: 0.26145021645021643
|
|
|
|
key: train_jcc
|
|
value: [0.38738739 0.34579439 0.40740741 0.33027523 0.33333333 0.37272727
|
|
0.38738739 0.34821429 0.34579439 0.36111111]
|
|
|
|
mean value: 0.3619432199472712
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00919151 0.01113057 0.00946307 0.01008654 0.01013613 0.01022291
|
|
0.01048517 0.01021123 0.00917387 0.01029396]
|
|
|
|
mean value: 0.010039496421813964
|
|
|
|
key: score_time
|
|
value: [0.07571197 0.01311994 0.01184392 0.01183653 0.01343513 0.01661658
|
|
0.01246881 0.01305532 0.01283407 0.01320052]
|
|
|
|
mean value: 0.01941227912902832
|
|
|
|
key: test_mcc
|
|
value: [-0.11527808 -0.0805823 0.1440976 -0.11527808 0.40347329 0.16174916
|
|
0.13074409 0. 0.53343495 0.08636566]
|
|
|
|
mean value: 0.11487262914190041
|
|
|
|
key: train_mcc
|
|
value: [0.39738044 0.48860092 0.47783774 0.39857698 0.38415639 0.40705044
|
|
0.44084039 0.41785771 0.44102867 0.36246703]
|
|
|
|
mean value: 0.4215796698264999
|
|
|
|
key: test_accuracy
|
|
value: [0.73333333 0.75555556 0.77777778 0.73333333 0.82222222 0.8
|
|
0.75555556 0.8 0.86363636 0.77272727]
|
|
|
|
mean value: 0.7814141414141414
|
|
|
|
key: train_accuracy
|
|
value: [0.83126551 0.85111663 0.84863524 0.83126551 0.82878412 0.83126551
|
|
0.83870968 0.8337469 0.83910891 0.82178218]
|
|
|
|
mean value: 0.8355680170994767
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0.16666667 0. 0.33333333 0.18181818
|
|
0.26666667 0. 0.5 0.16666667]
|
|
|
|
mean value: 0.16151515151515153
|
|
|
|
key: train_fscore
|
|
value: [0.40350877 0.50819672 0.49586777 0.4137931 0.37837838 0.42372881
|
|
0.45378151 0.42735043 0.45378151 0.38983051]
|
|
|
|
mean value: 0.4348217518257405
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0.5 0. 1. 0.5
|
|
0.33333333 0. 1. 0.33333333]
|
|
|
|
mean value: 0.36666666666666664
|
|
|
|
key: train_precision
|
|
value: [0.79310345 0.83783784 0.83333333 0.77419355 0.80769231 0.78125
|
|
0.81818182 0.80645161 0.81818182 0.71875 ]
|
|
|
|
mean value: 0.79889757247933
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0.1 0. 0.2 0.11111111
|
|
0.22222222 0. 0.33333333 0.11111111]
|
|
|
|
mean value: 0.10777777777777778
|
|
|
|
key: train_recall
|
|
value: [0.27058824 0.36470588 0.35294118 0.28235294 0.24705882 0.29069767
|
|
0.31395349 0.29069767 0.31395349 0.26744186]
|
|
|
|
mean value: 0.2994391244870041
|
|
|
|
key: test_roc_auc
|
|
value: [0.47142857 0.48571429 0.53571429 0.47142857 0.6 0.54166667
|
|
0.55555556 0.5 0.66666667 0.52698413]
|
|
|
|
mean value: 0.535515873015873
|
|
|
|
key: train_roc_auc
|
|
value: [0.62586016 0.67291898 0.66703663 0.63017018 0.61566778 0.63430783
|
|
0.64751302 0.63588511 0.64754278 0.61956999]
|
|
|
|
mean value: 0.6396472451195213
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0.09090909 0. 0.2 0.1
|
|
0.15384615 0. 0.33333333 0.09090909]
|
|
|
|
mean value: 0.0968997668997669
|
|
|
|
key: train_jcc
|
|
value: [0.25274725 0.34065934 0.32967033 0.26086957 0.23333333 0.2688172
|
|
0.29347826 0.27173913 0.29347826 0.24210526]
|
|
|
|
mean value: 0.2786897941260531
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01687837 0.01644373 0.0160346 0.01627159 0.01623106 0.01710892
|
|
0.01683617 0.01616502 0.01643872 0.01641154]
|
|
|
|
mean value: 0.016481971740722655
|
|
|
|
key: score_time
|
|
value: [0.01052117 0.01063561 0.0104959 0.0105865 0.01050854 0.0114789
|
|
0.01066828 0.01049829 0.01046586 0.01047754]
|
|
|
|
mean value: 0.01063365936279297
|
|
|
|
key: test_mcc
|
|
value: [ 0.26207121 0.40347329 0.26207121 0.58434871 0.66143783 0.53033009
|
|
0.43133109 -0.07537784 0.53343495 0.39561549]
|
|
|
|
mean value: 0.3988736027665821
|
|
|
|
key: train_mcc
|
|
value: [0.6356975 0.67155353 0.73221037 0.64469142 0.62655779 0.67504784
|
|
0.64864105 0.64852823 0.63074349 0.67557519]
|
|
|
|
mean value: 0.6589246406767015
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.82222222 0.77777778 0.86666667 0.88888889 0.86666667
|
|
0.84444444 0.77777778 0.86363636 0.81818182]
|
|
|
|
mean value: 0.8304040404040404
|
|
|
|
key: train_accuracy
|
|
value: [0.88833747 0.89826303 0.91563275 0.89081886 0.88585608 0.89826303
|
|
0.89081886 0.89081886 0.88613861 0.89851485]
|
|
|
|
mean value: 0.8943462398349016
|
|
|
|
key: test_fscore
|
|
value: [0.375 0.33333333 0.375 0.57142857 0.66666667 0.57142857
|
|
0.36363636 0. 0.5 0.5 ]
|
|
|
|
mean value: 0.4256493506493506
|
|
|
|
key: train_fscore
|
|
value: [0.66666667 0.70503597 0.76388889 0.68115942 0.65671642 0.70921986
|
|
0.68115942 0.68571429 0.66666667 0.70503597]
|
|
|
|
mean value: 0.6921263567028737
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.5 1. 1. 0.8
|
|
1. 0. 1. 0.57142857]
|
|
|
|
mean value: 0.7371428571428571
|
|
|
|
key: train_precision
|
|
value: [0.9 0.90740741 0.93220339 0.88679245 0.89795918 0.90909091
|
|
0.90384615 0.88888889 0.88461538 0.9245283 ]
|
|
|
|
mean value: 0.9035332072069703
|
|
|
|
key: test_recall
|
|
value: [0.3 0.2 0.3 0.4 0.5 0.44444444
|
|
0.22222222 0. 0.33333333 0.44444444]
|
|
|
|
mean value: 0.3144444444444444
|
|
|
|
key: train_recall
|
|
value: [0.52941176 0.57647059 0.64705882 0.55294118 0.51764706 0.58139535
|
|
0.54651163 0.55813953 0.53488372 0.56976744]
|
|
|
|
mean value: 0.5614227086183311
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.6 0.60714286 0.7 0.75 0.70833333
|
|
0.61111111 0.48611111 0.66666667 0.67936508]
|
|
|
|
mean value: 0.6415873015873016
|
|
|
|
key: train_roc_auc
|
|
value: [0.75684425 0.78037366 0.8172401 0.76703663 0.75096189 0.78281124
|
|
0.76536938 0.76960605 0.7580079 0.77859441]
|
|
|
|
mean value: 0.7726845503490838
|
|
|
|
key: test_jcc
|
|
value: [0.23076923 0.2 0.23076923 0.4 0.5 0.4
|
|
0.22222222 0. 0.33333333 0.33333333]
|
|
|
|
mean value: 0.28504273504273503
|
|
|
|
key: train_jcc
|
|
value: [0.5 0.54444444 0.61797753 0.51648352 0.48888889 0.54945055
|
|
0.51648352 0.52173913 0.5 0.54444444]
|
|
|
|
mean value: 0.529991201872003
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.39898419 0.51461124 0.38300133 0.62431479 0.38128495 0.57604122
|
|
0.67659616 0.69015074 0.43227696 0.37733197]
|
|
|
|
mean value: 0.5054593563079834
|
|
|
|
key: score_time
|
|
value: [0.01308799 0.01291609 0.01255608 0.01271915 0.01317668 0.01304269
|
|
0.01358128 0.01243114 0.0128274 0.01230597]
|
|
|
|
mean value: 0.012864446640014649
|
|
|
|
key: test_mcc
|
|
value: [0.36689969 0.41931393 0.40089186 0.67993436 0.39652234 0.49405246
|
|
0. 0.62050523 0. 0.35103197]
|
|
|
|
mean value: 0.37291518484726255
|
|
|
|
key: train_mcc
|
|
value: [0.6511516 0.70724799 0.65735947 0.66112062 0.54062418 0.67478542
|
|
0.50556597 0.68801849 0.3427885 0.56615341]
|
|
|
|
mean value: 0.5994815648349563
|
|
|
|
key: test_accuracy
|
|
value: [0.75555556 0.82222222 0.8 0.86666667 0.82222222 0.84444444
|
|
0.8 0.86666667 0.79545455 0.81818182]
|
|
|
|
mean value: 0.8191414141414142
|
|
|
|
key: train_accuracy
|
|
value: [0.88337469 0.89826303 0.89330025 0.86848635 0.86352357 0.89081886
|
|
0.85359801 0.88337469 0.81930693 0.86881188]
|
|
|
|
mean value: 0.872285826597548
|
|
|
|
key: test_fscore
|
|
value: [0.52173913 0.5 0.52631579 0.75 0.42857143 0.58823529
|
|
0. 0.7 0. 0.42857143]
|
|
|
|
mean value: 0.44434330711689707
|
|
|
|
key: train_fscore
|
|
value: [0.7251462 0.77094972 0.7114094 0.73366834 0.57364341 0.74418605
|
|
0.5203252 0.75647668 0.29126214 0.58267717]
|
|
|
|
mean value: 0.6409744303013355
|
|
|
|
key: test_precision
|
|
value: [0.46153846 0.66666667 0.55555556 0.64285714 0.75 0.625
|
|
0. 0.63636364 0. 0.6 ]
|
|
|
|
mean value: 0.4937981462981463
|
|
|
|
key: train_precision
|
|
value: [0.72093023 0.73404255 0.828125 0.64035088 0.84090909 0.74418605
|
|
0.86486486 0.68224299 0.88235294 0.90243902]
|
|
|
|
mean value: 0.7840443621449115
|
|
|
|
key: test_recall
|
|
value: [0.6 0.4 0.5 0.9 0.3 0.55555556
|
|
0. 0.77777778 0. 0.33333333]
|
|
|
|
mean value: 0.43666666666666665
|
|
|
|
key: train_recall
|
|
value: [0.72941176 0.81176471 0.62352941 0.85882353 0.43529412 0.74418605
|
|
0.37209302 0.84883721 0.1744186 0.43023256]
|
|
|
|
mean value: 0.602859097127223
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.67142857 0.69285714 0.87857143 0.63571429 0.73611111
|
|
0.5 0.83333333 0.5 0.63809524]
|
|
|
|
mean value: 0.678611111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.82697003 0.86657418 0.79446911 0.86494636 0.70664077 0.83739271
|
|
0.67816008 0.87079084 0.58406465 0.70882697]
|
|
|
|
mean value: 0.7738835691576238
|
|
|
|
key: test_jcc
|
|
value: [0.35294118 0.33333333 0.35714286 0.6 0.27272727 0.41666667
|
|
0. 0.53846154 0. 0.27272727]
|
|
|
|
mean value: 0.3144000117529529
|
|
|
|
key: train_jcc
|
|
value: [0.56880734 0.62727273 0.55208333 0.57936508 0.40217391 0.59259259
|
|
0.35164835 0.60833333 0.17045455 0.41111111]
|
|
|
|
mean value: 0.48638423266040937
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02867031 0.0190258 0.02192831 0.02127004 0.01687574 0.01767182
|
|
0.02091908 0.02044082 0.01987004 0.01973128]
|
|
|
|
mean value: 0.02064032554626465
|
|
|
|
key: score_time
|
|
value: [0.01214671 0.00912452 0.00886297 0.00868011 0.00868535 0.0087533
|
|
0.00918269 0.00876474 0.00927711 0.00871444]
|
|
|
|
mean value: 0.009219193458557129
|
|
|
|
key: test_mcc
|
|
value: [0.72069583 0.73010948 0.74285714 0.86991767 0.87142857 0.72222222
|
|
0.78467156 0.80178373 0.85775039 0.87831007]
|
|
|
|
mean value: 0.7979746654561855
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.91111111 0.91111111 0.95555556 0.95555556 0.91111111
|
|
0.93333333 0.93333333 0.95454545 0.95454545]
|
|
|
|
mean value: 0.9309090909090909
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.77777778 0.8 0.88888889 0.9 0.77777778
|
|
0.82352941 0.84210526 0.875 0.9 ]
|
|
|
|
mean value: 0.8367687815019219
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.875 0.8 1. 0.9 0.77777778
|
|
0.875 0.8 1. 0.81818182]
|
|
|
|
mean value: 0.8538267288267288
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9 0.7 0.8 0.8 0.9 0.77777778
|
|
0.77777778 0.88888889 0.77777778 1. ]
|
|
|
|
mean value: 0.8322222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.89285714 0.83571429 0.87142857 0.9 0.93571429 0.86111111
|
|
0.875 0.91666667 0.88888889 0.97142857]
|
|
|
|
mean value: 0.8948809523809523
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.63636364 0.66666667 0.8 0.81818182 0.63636364
|
|
0.7 0.72727273 0.77777778 0.81818182]
|
|
|
|
mean value: 0.7223665223665223
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11645579 0.11334872 0.11154747 0.11285019 0.11265063 0.1126678
|
|
0.11334538 0.11394501 0.11281013 0.11332774]
|
|
|
|
mean value: 0.11329488754272461
|
|
|
|
key: score_time
|
|
value: [0.01869702 0.01798868 0.0177238 0.01793098 0.01770687 0.01763916
|
|
0.0176096 0.01786375 0.01782537 0.01779485]
|
|
|
|
mean value: 0.017878007888793946
|
|
|
|
key: test_mcc
|
|
value: [0.56660974 0.5 0.26726124 0.73379939 0.66143783 0.62103443
|
|
0.70710678 0.53033009 0.52856538 0.49137176]
|
|
|
|
mean value: 0.5607516633187313
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84444444 0.84444444 0.75555556 0.91111111 0.88888889 0.88888889
|
|
0.91111111 0.86666667 0.86363636 0.84090909]
|
|
|
|
mean value: 0.8615656565656565
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.46153846 0.42105263 0.75 0.66666667 0.66666667
|
|
0.71428571 0.57142857 0.57142857 0.58823529]
|
|
|
|
mean value: 0.6077969244377913
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.63636364 1. 0.44444444 1. 1. 0.83333333
|
|
1. 0.8 0.8 0.625 ]
|
|
|
|
mean value: 0.8139141414141414
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.7 0.3 0.4 0.6 0.5 0.55555556
|
|
0.55555556 0.44444444 0.44444444 0.55555556]
|
|
|
|
mean value: 0.5055555555555555
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79285714 0.65 0.62857143 0.8 0.75 0.76388889
|
|
0.77777778 0.70833333 0.70793651 0.73492063]
|
|
|
|
mean value: 0.7314285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.3 0.26666667 0.6 0.5 0.5
|
|
0.55555556 0.4 0.4 0.41666667]
|
|
|
|
mean value: 0.4438888888888889
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01003838 0.01014161 0.009974 0.01003861 0.01023602 0.00976062
|
|
0.0098865 0.00992179 0.00999475 0.00985217]
|
|
|
|
mean value: 0.009984445571899415
|
|
|
|
key: score_time
|
|
value: [0.00907516 0.00905442 0.00880313 0.00874567 0.00878549 0.00877738
|
|
0.00870132 0.00864339 0.00866103 0.00882006]
|
|
|
|
mean value: 0.008806705474853516
|
|
|
|
key: test_mcc
|
|
value: [0.56660974 0.13363062 0.21809215 0.24896765 0.13363062 0.30555556
|
|
0.16666667 0.45760432 0.54966046 0.3015873 ]
|
|
|
|
mean value: 0.3082005080435639
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84444444 0.71111111 0.68888889 0.71111111 0.71111111 0.77777778
|
|
0.73333333 0.84444444 0.86363636 0.77272727]
|
|
|
|
mean value: 0.7658585858585859
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.31578947 0.41666667 0.43478261 0.31578947 0.44444444
|
|
0.33333333 0.53333333 0.625 0.44444444]
|
|
|
|
mean value: 0.45302504449529624
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.33333333 0.35714286 0.38461538 0.33333333 0.44444444
|
|
0.33333333 0.66666667 0.71428571 0.44444444]
|
|
|
|
mean value: 0.4647963147963148
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.7 0.3 0.5 0.5 0.3 0.44444444
|
|
0.33333333 0.44444444 0.55555556 0.44444444]
|
|
|
|
mean value: 0.4522222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79285714 0.56428571 0.62142857 0.63571429 0.56428571 0.65277778
|
|
0.58333333 0.69444444 0.74920635 0.65079365]
|
|
|
|
mean value: 0.6509126984126984
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.1875 0.26315789 0.27777778 0.1875 0.28571429
|
|
0.2 0.36363636 0.45454545 0.28571429]
|
|
|
|
mean value: 0.30055460621250096
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.63964295 1.63628864 1.64319444 1.68942332 1.62369728 1.60509205
|
|
1.66870928 1.68308854 1.62524271 1.67988706]
|
|
|
|
mean value: 1.6494266271591187
|
|
|
|
key: score_time
|
|
value: [0.09290004 0.09242129 0.09953356 0.10000849 0.09116507 0.09121084
|
|
0.10120964 0.09084678 0.09614325 0.09951377]
|
|
|
|
mean value: 0.09549527168273926
|
|
|
|
key: test_mcc
|
|
value: [0.83862787 0.58434871 0.50799198 0.80295507 0.80295507 0.72222222
|
|
0.62469505 0.62103443 0.85775039 0.72063492]
|
|
|
|
mean value: 0.7083215709196128
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93333333 0.86666667 0.84444444 0.93333333 0.93333333 0.91111111
|
|
0.88888889 0.88888889 0.95454545 0.90909091]
|
|
|
|
mean value: 0.9063636363636364
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.57142857 0.58823529 0.82352941 0.82352941 0.77777778
|
|
0.61538462 0.66666667 0.875 0.77777778]
|
|
|
|
mean value: 0.7388894744073773
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76923077 1. 0.71428571 1. 1. 0.77777778
|
|
1. 0.83333333 1. 0.77777778]
|
|
|
|
mean value: 0.8872405372405372
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.4 0.5 0.7 0.7 0.77777778
|
|
0.44444444 0.55555556 0.77777778 0.77777778]
|
|
|
|
mean value: 0.6633333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95714286 0.7 0.72142857 0.85 0.85 0.86111111
|
|
0.72222222 0.76388889 0.88888889 0.86031746]
|
|
|
|
mean value: 0.8175
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.4 0.41666667 0.7 0.7 0.63636364
|
|
0.44444444 0.5 0.77777778 0.63636364]
|
|
|
|
mean value: 0.598084693084693
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.80366826 0.98793888 0.97595072 0.94995117 0.97989249 0.95853686
|
|
0.93135166 0.96895552 0.95765615 0.96688747]
|
|
|
|
mean value: 1.0480789184570312
|
|
|
|
key: score_time
|
|
value: [0.25518179 0.29706073 0.26562572 0.23141026 0.25804043 0.23505807
|
|
0.22543502 0.21989369 0.26869226 0.19831777]
|
|
|
|
mean value: 0.24547157287597657
|
|
|
|
key: test_mcc
|
|
value: [0.59030128 0.58434871 0.41931393 0.66143783 0.66143783 0.70511024
|
|
0.70710678 0.42947785 0.70609879 0.52856538]
|
|
|
|
mean value: 0.5993198611145262
|
|
|
|
key: train_mcc
|
|
value: [0.92444922 0.92444922 0.90160916 0.92455638 0.92444922 0.94850528
|
|
0.94783433 0.90993424 0.93281456 0.92527524]
|
|
|
|
mean value: 0.9263876852055687
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.86666667 0.82222222 0.88888889 0.88888889 0.91111111
|
|
0.91111111 0.84444444 0.90909091 0.86363636]
|
|
|
|
mean value: 0.8772727272727272
|
|
|
|
key: train_accuracy
|
|
value: [0.9751861 0.9751861 0.96774194 0.9751861 0.9751861 0.98263027
|
|
0.98263027 0.97022333 0.97772277 0.97524752]
|
|
|
|
mean value: 0.9756940520354764
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.57142857 0.5 0.66666667 0.66666667 0.75
|
|
0.71428571 0.46153846 0.71428571 0.57142857]
|
|
|
|
mean value: 0.6282967032967033
|
|
|
|
key: train_fscore
|
|
value: [0.93902439 0.93902439 0.92121212 0.9382716 0.93902439 0.95953757
|
|
0.95857988 0.92771084 0.94545455 0.93902439]
|
|
|
|
mean value: 0.9406864129865182
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.66666667 1. 1. 0.85714286
|
|
1. 0.75 1. 0.8 ]
|
|
|
|
mean value: 0.8823809523809524
|
|
|
|
key: train_precision
|
|
value: [0.97468354 0.97468354 0.95 0.98701299 0.97468354 0.95402299
|
|
0.97590361 0.9625 0.98734177 0.98717949]
|
|
|
|
mean value: 0.9728011482219344
|
|
|
|
key: test_recall
|
|
value: [0.6 0.4 0.4 0.5 0.5 0.66666667
|
|
0.55555556 0.33333333 0.55555556 0.44444444]
|
|
|
|
mean value: 0.4955555555555555
|
|
|
|
key: train_recall
|
|
value: [0.90588235 0.90588235 0.89411765 0.89411765 0.90588235 0.96511628
|
|
0.94186047 0.89534884 0.90697674 0.89534884]
|
|
|
|
mean value: 0.9110533515731875
|
|
|
|
key: test_roc_auc
|
|
value: [0.77142857 0.7 0.67142857 0.75 0.75 0.81944444
|
|
0.77777778 0.65277778 0.77777778 0.70793651]
|
|
|
|
mean value: 0.7378571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.94979652 0.94979652 0.94076952 0.9454865 0.94979652 0.97624899
|
|
0.96777566 0.94294256 0.95191605 0.94610209]
|
|
|
|
mean value: 0.9520630922697395
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4 0.33333333 0.5 0.5 0.6
|
|
0.55555556 0.3 0.55555556 0.4 ]
|
|
|
|
mean value: 0.46444444444444444
|
|
|
|
key: train_jcc
|
|
value: [0.88505747 0.88505747 0.85393258 0.88372093 0.88505747 0.92222222
|
|
0.92045455 0.86516854 0.89655172 0.88505747]
|
|
|
|
mean value: 0.8882280430700233
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02433014 0.01029229 0.01110339 0.01064634 0.01055527 0.0109787
|
|
0.01003981 0.01027012 0.01097775 0.01092672]
|
|
|
|
mean value: 0.012012052536010741
|
|
|
|
key: score_time
|
|
value: [0.01229501 0.00954676 0.00977492 0.009202 0.00915837 0.00908947
|
|
0.00956511 0.00953436 0.00926089 0.0096221 ]
|
|
|
|
mean value: 0.009704899787902833
|
|
|
|
key: test_mcc
|
|
value: [0.48571429 0.41931393 0.10482848 0.49135381 0.40089186 0.03904344
|
|
0.23426064 0.16666667 0.39723602 0.17349856]
|
|
|
|
mean value: 0.29128077065715524
|
|
|
|
key: train_mcc
|
|
value: [0.45932002 0.4225046 0.48796447 0.39888858 0.40529872 0.44673035
|
|
0.46070614 0.41379661 0.42515227 0.43936752]
|
|
|
|
mean value: 0.4359729278396321
|
|
|
|
key: test_accuracy
|
|
value: [0.82222222 0.82222222 0.73333333 0.84444444 0.8 0.75555556
|
|
0.8 0.73333333 0.79545455 0.77272727]
|
|
|
|
mean value: 0.7879292929292929
|
|
|
|
key: train_accuracy
|
|
value: [0.83126551 0.82630273 0.84119107 0.81885856 0.82133995 0.82878412
|
|
0.83126551 0.81885856 0.82673267 0.82920792]
|
|
|
|
mean value: 0.8273806599022184
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.5 0.25 0.53333333 0.52631579 0.15384615
|
|
0.30769231 0.33333333 0.52631579 0.28571429]
|
|
|
|
mean value: 0.4016550992866782
|
|
|
|
key: train_fscore
|
|
value: [0.55844156 0.51388889 0.57894737 0.49655172 0.5 0.54304636
|
|
0.55844156 0.51655629 0.51388889 0.53061224]
|
|
|
|
mean value: 0.531037488112446
|
|
|
|
key: test_precision
|
|
value: [0.6 0.66666667 0.33333333 0.8 0.55555556 0.25
|
|
0.5 0.33333333 0.5 0.4 ]
|
|
|
|
mean value: 0.4938888888888889
|
|
|
|
key: train_precision
|
|
value: [0.62318841 0.62711864 0.65671642 0.6 0.61016949 0.63076923
|
|
0.63235294 0.6 0.63793103 0.63934426]
|
|
|
|
mean value: 0.6257590428024311
|
|
|
|
key: test_recall
|
|
value: [0.6 0.4 0.2 0.4 0.5 0.11111111
|
|
0.22222222 0.33333333 0.55555556 0.22222222]
|
|
|
|
mean value: 0.35444444444444445
|
|
|
|
key: train_recall
|
|
value: [0.50588235 0.43529412 0.51764706 0.42352941 0.42352941 0.47674419
|
|
0.5 0.45348837 0.43023256 0.45348837]
|
|
|
|
mean value: 0.46198358413132695
|
|
|
|
key: test_roc_auc
|
|
value: [0.74285714 0.67142857 0.54285714 0.68571429 0.69285714 0.51388889
|
|
0.58333333 0.58333333 0.70634921 0.56825397]
|
|
|
|
mean value: 0.6290873015873015
|
|
|
|
key: train_roc_auc
|
|
value: [0.71206067 0.68305586 0.72266001 0.67402886 0.67560118 0.7005172
|
|
0.71056782 0.68573472 0.68209741 0.69215299]
|
|
|
|
mean value: 0.693847673660013
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.33333333 0.14285714 0.36363636 0.35714286 0.08333333
|
|
0.18181818 0.2 0.35714286 0.16666667]
|
|
|
|
mean value: 0.26145021645021643
|
|
|
|
key: train_jcc
|
|
value: [0.38738739 0.34579439 0.40740741 0.33027523 0.33333333 0.37272727
|
|
0.38738739 0.34821429 0.34579439 0.36111111]
|
|
|
|
mean value: 0.3619432199472712
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.11651206 0.06531167 0.20808911 0.05981827 0.06227994 0.05916762
|
|
0.06192207 0.06170964 0.07172799 0.06925321]
|
|
|
|
mean value: 0.08357915878295899
|
|
|
|
key: score_time
|
|
value: [0.01111245 0.01109648 0.01118135 0.01165128 0.01182032 0.01047373
|
|
0.01045132 0.01045203 0.01048446 0.01087856]
|
|
|
|
mean value: 0.010960197448730469
|
|
|
|
key: test_mcc
|
|
value: [0.79539491 1. 0.88640526 0.86991767 0.86991767 0.63936201
|
|
0.92998111 0.86111111 0.92962225 0.93503247]
|
|
|
|
mean value: 0.8716744467032971
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91111111 1. 0.95555556 0.95555556 0.95555556 0.88888889
|
|
0.97777778 0.95555556 0.97727273 0.97727273]
|
|
|
|
mean value: 0.9554545454545454
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 1. 0.90909091 0.88888889 0.88888889 0.70588235
|
|
0.94117647 0.88888889 0.94117647 0.94736842]
|
|
|
|
mean value: 0.8944694624261188
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 1. 0.83333333 1. 1. 0.75
|
|
1. 0.88888889 1. 0.9 ]
|
|
|
|
mean value: 0.9086507936507937
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.8 0.8 0.66666667
|
|
0.88888889 0.88888889 0.88888889 1. ]
|
|
|
|
mean value: 0.8933333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94285714 1. 0.97142857 0.9 0.9 0.80555556
|
|
0.94444444 0.93055556 0.94444444 0.98571429]
|
|
|
|
mean value: 0.9325
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 1. 0.83333333 0.8 0.8 0.54545455
|
|
0.88888889 0.8 0.88888889 0.9 ]
|
|
|
|
mean value: 0.8170851370851371
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06666088 0.05824232 0.03658915 0.06690645 0.0736444 0.03895807
|
|
0.05812693 0.08046889 0.07247853 0.03774714]
|
|
|
|
mean value: 0.05898227691650391
|
|
|
|
key: score_time
|
|
value: [0.02602434 0.01202106 0.01201797 0.02015209 0.01300311 0.01202083
|
|
0.02322769 0.02408862 0.01204586 0.01197553]
|
|
|
|
mean value: 0.016657710075378418
|
|
|
|
key: test_mcc
|
|
value: [0.52378493 0.59030128 0.76553182 0.87142857 0.86991767 0.49405246
|
|
0.85839508 0.70511024 0.63745526 0.86031746]
|
|
|
|
mean value: 0.7176294762949031
|
|
|
|
key: train_mcc
|
|
value: [0.9480673 0.9326436 0.91837573 0.9261915 0.9261915 0.94146804
|
|
0.91906425 0.92680819 0.92613719 0.92685797]
|
|
|
|
mean value: 0.9291805278231328
|
|
|
|
key: test_accuracy
|
|
value: [0.82222222 0.86666667 0.91111111 0.95555556 0.95555556 0.84444444
|
|
0.95555556 0.91111111 0.88636364 0.95454545]
|
|
|
|
mean value: 0.9063131313131313
|
|
|
|
key: train_accuracy
|
|
value: [0.98263027 0.97766749 0.97270471 0.9751861 0.9751861 0.98014888
|
|
0.97270471 0.9751861 0.97524752 0.97524752]
|
|
|
|
mean value: 0.9761909441564504
|
|
|
|
key: test_fscore
|
|
value: [0.63636364 0.66666667 0.81818182 0.9 0.88888889 0.58823529
|
|
0.875 0.75 0.70588235 0.88888889]
|
|
|
|
mean value: 0.7718107546048723
|
|
|
|
key: train_fscore
|
|
value: [0.95906433 0.94674556 0.93567251 0.94186047 0.94186047 0.95402299
|
|
0.93641618 0.94252874 0.94186047 0.94252874]
|
|
|
|
mean value: 0.9442560444325491
|
|
|
|
key: test_precision
|
|
value: [0.58333333 0.75 0.75 0.9 1. 0.625
|
|
1. 0.85714286 0.75 0.88888889]
|
|
|
|
mean value: 0.8104365079365079
|
|
|
|
key: train_precision
|
|
value: [0.95348837 0.95238095 0.93023256 0.93103448 0.93103448 0.94318182
|
|
0.93103448 0.93181818 0.94186047 0.93181818]
|
|
|
|
mean value: 0.9377883977823833
|
|
|
|
key: test_recall
|
|
value: [0.7 0.6 0.9 0.9 0.8 0.55555556
|
|
0.77777778 0.66666667 0.66666667 0.88888889]
|
|
|
|
mean value: 0.7455555555555555
|
|
|
|
key: train_recall
|
|
value: [0.96470588 0.94117647 0.94117647 0.95294118 0.95294118 0.96511628
|
|
0.94186047 0.95348837 0.94186047 0.95348837]
|
|
|
|
mean value: 0.950875512995896
|
|
|
|
key: test_roc_auc
|
|
value: [0.77857143 0.77142857 0.90714286 0.93571429 0.9 0.73611111
|
|
0.88888889 0.81944444 0.8047619 0.93015873]
|
|
|
|
mean value: 0.8472222222222222
|
|
|
|
key: train_roc_auc
|
|
value: [0.97606363 0.96429893 0.96115427 0.96703663 0.96703663 0.9746717
|
|
0.96146651 0.96728046 0.9630686 0.96731022]
|
|
|
|
mean value: 0.9669387584224366
|
|
|
|
key: test_jcc
|
|
value: [0.46666667 0.5 0.69230769 0.81818182 0.8 0.41666667
|
|
0.77777778 0.6 0.54545455 0.8 ]
|
|
|
|
mean value: 0.6417055167055167
|
|
|
|
key: train_jcc
|
|
value: [0.92134831 0.8988764 0.87912088 0.89010989 0.89010989 0.91208791
|
|
0.88043478 0.89130435 0.89010989 0.89130435]
|
|
|
|
mean value: 0.8944806658900455
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01331973 0.00936556 0.00945139 0.00946188 0.00945044 0.00945139
|
|
0.00947118 0.00941181 0.01058984 0.0093739 ]
|
|
|
|
mean value: 0.009934711456298827
|
|
|
|
key: score_time
|
|
value: [0.01080084 0.00852752 0.0085578 0.0086689 0.00871801 0.00864577
|
|
0.00851727 0.00860786 0.00937247 0.00850677]
|
|
|
|
mean value: 0.008892321586608886
|
|
|
|
key: test_mcc
|
|
value: [0.56660974 0.49135381 0.10482848 0.45049308 0.50799198 0.62103443
|
|
0.53452248 0.35355339 0.78360391 0.39561549]
|
|
|
|
mean value: 0.4809606803172119
|
|
|
|
key: train_mcc
|
|
value: [0.59299969 0.54086905 0.5981201 0.52390689 0.51581016 0.54156346
|
|
0.56199847 0.55505906 0.52932639 0.59273237]
|
|
|
|
mean value: 0.5552385630947969
|
|
|
|
key: test_accuracy
|
|
value: [0.84444444 0.84444444 0.73333333 0.82222222 0.84444444 0.88888889
|
|
0.86666667 0.82222222 0.93181818 0.81818182]
|
|
|
|
mean value: 0.8416666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.87344913 0.85856079 0.87593052 0.85111663 0.85359801 0.8560794
|
|
0.86352357 0.86104218 0.8539604 0.87128713]
|
|
|
|
mean value: 0.8618547772891433
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.53333333 0.25 0.55555556 0.58823529 0.66666667
|
|
0.5 0.42857143 0.8 0.5 ]
|
|
|
|
mean value: 0.5489028944911298
|
|
|
|
key: train_fscore
|
|
value: [0.66225166 0.61744966 0.66216216 0.61038961 0.58741259 0.62337662
|
|
0.63576159 0.63157895 0.60927152 0.66666667]
|
|
|
|
mean value: 0.6306321030017522
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.8 0.33333333 0.625 0.71428571 0.83333333
|
|
1. 0.6 1. 0.57142857]
|
|
|
|
mean value: 0.7113744588744588
|
|
|
|
key: train_precision
|
|
value: [0.75757576 0.71875 0.77777778 0.68115942 0.72413793 0.70588235
|
|
0.73846154 0.72727273 0.70769231 0.74285714]
|
|
|
|
mean value: 0.7281566955902766
|
|
|
|
key: test_recall
|
|
value: [0.7 0.4 0.2 0.5 0.5 0.55555556
|
|
0.33333333 0.33333333 0.66666667 0.44444444]
|
|
|
|
mean value: 0.4633333333333333
|
|
|
|
key: train_recall
|
|
value: [0.58823529 0.54117647 0.57647059 0.55294118 0.49411765 0.55813953
|
|
0.55813953 0.55813953 0.53488372 0.60465116]
|
|
|
|
mean value: 0.5566894664842681
|
|
|
|
key: test_roc_auc
|
|
value: [0.79285714 0.68571429 0.54285714 0.70714286 0.72142857 0.76388889
|
|
0.66666667 0.63888889 0.83333333 0.67936508]
|
|
|
|
mean value: 0.7032142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.76896041 0.74228635 0.76622272 0.74187939 0.72190159 0.74752403
|
|
0.75225589 0.7506786 0.73756765 0.77402369]
|
|
|
|
mean value: 0.7503300317362713
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.36363636 0.14285714 0.38461538 0.41666667 0.5
|
|
0.33333333 0.27272727 0.66666667 0.33333333]
|
|
|
|
mean value: 0.3913836163836164
|
|
|
|
key: train_jcc
|
|
value: [0.4950495 0.44660194 0.49494949 0.43925234 0.41584158 0.45283019
|
|
0.46601942 0.46153846 0.43809524 0.5 ]
|
|
|
|
mean value: 0.46101781680432496
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01941013 0.01949978 0.01897502 0.02570105 0.01601815 0.0199337
|
|
0.02093601 0.01855731 0.02492166 0.01731586]
|
|
|
|
mean value: 0.020126867294311523
|
|
|
|
key: score_time
|
|
value: [0.00912333 0.01105547 0.01100564 0.01153731 0.01151252 0.01167107
|
|
0.0116508 0.01161981 0.01158333 0.011549 ]
|
|
|
|
mean value: 0.011230826377868652
|
|
|
|
key: test_mcc
|
|
value: [0.67993436 0.5 0.61428571 0.93541435 0.74285714 0.
|
|
0.70710678 0.62103443 0.86031746 0.66613424]
|
|
|
|
mean value: 0.6327084472124558
|
|
|
|
key: train_mcc
|
|
value: [0.92298771 0.8938066 0.88582773 0.88372342 0.85227373 0.21519088
|
|
0.8565553 0.88868416 0.90440838 0.82480205]
|
|
|
|
mean value: 0.8128259980636565
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.84444444 0.86666667 0.97777778 0.91111111 0.8
|
|
0.91111111 0.88888889 0.95454545 0.88636364]
|
|
|
|
mean value: 0.8907575757575757
|
|
|
|
key: train_accuracy
|
|
value: [0.97270471 0.96526055 0.96277916 0.95781638 0.95037221 0.79900744
|
|
0.9528536 0.96277916 0.96782178 0.94306931]
|
|
|
|
mean value: 0.9434464290101466
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.46153846 0.7 0.94736842 0.8 0.
|
|
0.71428571 0.66666667 0.88888889 0.73684211]
|
|
|
|
mean value: 0.6665590257695521
|
|
|
|
key: train_fscore
|
|
value: [0.93854749 0.91463415 0.9068323 0.90710383 0.88372093 0.10989011
|
|
0.87741935 0.9122807 0.92485549 0.85714286]
|
|
|
|
mean value: 0.8232427200836342
|
|
|
|
key: test_precision
|
|
value: [0.64285714 1. 0.7 1. 0.8 0.
|
|
1. 0.83333333 0.88888889 0.7 ]
|
|
|
|
mean value: 0.7565079365079365
|
|
|
|
key: train_precision
|
|
value: [0.89361702 0.94936709 0.96052632 0.84693878 0.87356322 1.
|
|
0.98550725 0.91764706 0.91954023 0.92 ]
|
|
|
|
mean value: 0.9266706954660071
|
|
|
|
key: test_recall
|
|
value: [0.9 0.3 0.7 0.9 0.8 0.
|
|
0.55555556 0.55555556 0.88888889 0.77777778]
|
|
|
|
mean value: 0.6377777777777778
|
|
|
|
key: train_recall
|
|
value: [0.98823529 0.88235294 0.85882353 0.97647059 0.89411765 0.05813953
|
|
0.79069767 0.90697674 0.93023256 0.80232558]
|
|
|
|
mean value: 0.8088372093023256
|
|
|
|
key: test_roc_auc
|
|
value: [0.87857143 0.65 0.80714286 0.95 0.87142857 0.5
|
|
0.77777778 0.76388889 0.93015873 0.84603175]
|
|
|
|
mean value: 0.7975
|
|
|
|
key: train_roc_auc
|
|
value: [0.97839438 0.93488716 0.92469478 0.96465039 0.92976323 0.52906977
|
|
0.89377155 0.94244736 0.95410999 0.89172883]
|
|
|
|
mean value: 0.8943517435516251
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.3 0.53846154 0.9 0.66666667 0.
|
|
0.55555556 0.5 0.8 0.58333333]
|
|
|
|
mean value: 0.5444017094017094
|
|
|
|
key: train_jcc
|
|
value: [0.88421053 0.84269663 0.82954545 0.83 0.79166667 0.05813953
|
|
0.7816092 0.83870968 0.86021505 0.75 ]
|
|
|
|
mean value: 0.746679273821021
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0174973 0.01909232 0.02306342 0.01732659 0.01916409 0.01731849
|
|
0.02019167 0.01679826 0.01788068 0.0204103 ]
|
|
|
|
mean value: 0.018874311447143556
|
|
|
|
key: score_time
|
|
value: [0.01155806 0.01147556 0.01182961 0.01158834 0.01158643 0.0115397
|
|
0.01162243 0.01153493 0.01162481 0.01155519]
|
|
|
|
mean value: 0.011591506004333497
|
|
|
|
key: test_mcc
|
|
value: [0.45049308 0.40347329 0.28571429 0.93541435 0.87142857 0.63936201
|
|
0.85839508 0.78467156 0.70156665 0.58167505]
|
|
|
|
mean value: 0.6512193929216087
|
|
|
|
key: train_mcc
|
|
value: [0.87019328 0.88582773 0.63983663 0.87848528 0.91140795 0.89514372
|
|
0.91788121 0.87383838 0.75258437 0.74631857]
|
|
|
|
mean value: 0.8371517114045162
|
|
|
|
key: test_accuracy
|
|
value: [0.82222222 0.82222222 0.8 0.97777778 0.95555556 0.88888889
|
|
0.95555556 0.93333333 0.88636364 0.77272727]
|
|
|
|
mean value: 0.8814646464646465
|
|
|
|
key: train_accuracy
|
|
value: [0.95781638 0.96277916 0.88833747 0.96029777 0.97022333 0.96526055
|
|
0.97270471 0.95781638 0.9009901 0.88861386]
|
|
|
|
mean value: 0.9424839692405965
|
|
|
|
key: test_fscore
|
|
value: [0.55555556 0.33333333 0.30769231 0.94736842 0.9 0.70588235
|
|
0.875 0.82352941 0.76190476 0.64285714]
|
|
|
|
mean value: 0.6853123287101616
|
|
|
|
key: train_fscore
|
|
value: [0.89308176 0.9068323 0.64566929 0.90243902 0.93023256 0.91666667
|
|
0.93491124 0.9005848 0.8019802 0.79069767]
|
|
|
|
mean value: 0.8623095510041557
|
|
|
|
key: test_precision
|
|
value: [0.625 1. 0.66666667 1. 0.9 0.75
|
|
1. 0.875 0.66666667 0.47368421]
|
|
|
|
mean value: 0.7957017543859649
|
|
|
|
key: train_precision
|
|
value: [0.95945946 0.96052632 0.97619048 0.93670886 0.91954023 0.93902439
|
|
0.95180723 0.90588235 0.69827586 0.65891473]
|
|
|
|
mean value: 0.8906329904935838
|
|
|
|
key: test_recall
|
|
value: [0.5 0.2 0.2 0.9 0.9 0.66666667
|
|
0.77777778 0.77777778 0.88888889 1. ]
|
|
|
|
mean value: 0.6811111111111111
|
|
|
|
key: train_recall
|
|
value: [0.83529412 0.85882353 0.48235294 0.87058824 0.94117647 0.89534884
|
|
0.91860465 0.89534884 0.94186047 0.98837209]
|
|
|
|
mean value: 0.8627770177838577
|
|
|
|
key: test_roc_auc
|
|
value: [0.70714286 0.6 0.58571429 0.95 0.93571429 0.80555556
|
|
0.88888889 0.875 0.88730159 0.85714286]
|
|
|
|
mean value: 0.8092460317460317
|
|
|
|
key: train_roc_auc
|
|
value: [0.91293008 0.92469478 0.73960414 0.92743248 0.95958195 0.93978798
|
|
0.95299318 0.93505612 0.91589879 0.92500366]
|
|
|
|
mean value: 0.913298315847864
|
|
|
|
key: test_jcc
|
|
value: [0.38461538 0.2 0.18181818 0.9 0.81818182 0.54545455
|
|
0.77777778 0.7 0.61538462 0.47368421]
|
|
|
|
mean value: 0.5596916533758639
|
|
|
|
key: train_jcc
|
|
value: [0.80681818 0.82954545 0.47674419 0.82222222 0.86956522 0.84615385
|
|
0.87777778 0.81914894 0.66942149 0.65384615]
|
|
|
|
mean value: 0.767124346357497
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17264414 0.16704035 0.16875005 0.15653253 0.15798545 0.1655457
|
|
0.16948199 0.1682601 0.16138434 0.15674305]
|
|
|
|
mean value: 0.16443676948547364
|
|
|
|
key: score_time
|
|
value: [0.01646519 0.01662302 0.01668525 0.0154624 0.01630688 0.0166564
|
|
0.01666498 0.01674223 0.01552725 0.01522613]
|
|
|
|
mean value: 0.016235971450805665
|
|
|
|
key: test_mcc
|
|
value: [0.83862787 0.87142857 0.88640526 0.93541435 0.93541435 0.63936201
|
|
1. 0.80178373 0.92962225 0.86031746]
|
|
|
|
mean value: 0.8698375845471263
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93333333 0.95555556 0.95555556 0.97777778 0.97777778 0.88888889
|
|
1. 0.93333333 0.97727273 0.95454545]
|
|
|
|
mean value: 0.9554040404040404
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.9 0.90909091 0.94736842 0.94736842 0.70588235
|
|
1. 0.84210526 0.94117647 0.88888889]
|
|
|
|
mean value: 0.8951445944163672
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.9 0.83333333 1. 1. 0.75
|
|
1. 0.8 1. 0.88888889]
|
|
|
|
mean value: 0.8941452991452992
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.9 1. 0.9 0.9 0.66666667
|
|
1. 0.88888889 0.88888889 0.88888889]
|
|
|
|
mean value: 0.9033333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95714286 0.93571429 0.97142857 0.95 0.95 0.80555556
|
|
1. 0.91666667 0.94444444 0.93015873]
|
|
|
|
mean value: 0.9361111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.81818182 0.83333333 0.9 0.9 0.54545455
|
|
1. 0.72727273 0.88888889 0.8 ]
|
|
|
|
mean value: 0.8182362082362082
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05988169 0.0675571 0.06773233 0.05582237 0.05317688 0.06685781
|
|
0.06940985 0.06966257 0.04774928 0.06525612]
|
|
|
|
mean value: 0.06231060028076172
|
|
|
|
key: score_time
|
|
value: [0.025594 0.021667 0.02655983 0.02324939 0.02482367 0.02382517
|
|
0.03287601 0.03124142 0.01825094 0.03199196]
|
|
|
|
mean value: 0.026007938385009765
|
|
|
|
key: test_mcc
|
|
value: [0.79539491 0.87142857 0.88640526 0.93541435 0.87142857 0.72222222
|
|
0.92998111 0.72222222 0.92962225 0.93503247]
|
|
|
|
mean value: 0.8599151932022566
|
|
|
|
key: train_mcc
|
|
value: [0.99260466 0.98532572 0.97018128 0.97775887 0.99260466 0.98521752
|
|
0.97833911 0.97775887 0.96293777 0.97080757]
|
|
|
|
mean value: 0.9793536024068974
|
|
|
|
key: test_accuracy
|
|
value: [0.91111111 0.95555556 0.95555556 0.97777778 0.95555556 0.91111111
|
|
0.97777778 0.91111111 0.97727273 0.97727273]
|
|
|
|
mean value: 0.951010101010101
|
|
|
|
key: train_accuracy
|
|
value: [0.99751861 0.99503722 0.99007444 0.99255583 0.99751861 0.99503722
|
|
0.99255583 0.99255583 0.98762376 0.99009901]
|
|
|
|
mean value: 0.9930576370292116
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.9 0.90909091 0.94736842 0.9 0.77777778
|
|
0.94117647 0.77777778 0.94117647 0.94736842]
|
|
|
|
mean value: 0.8875069581261532
|
|
|
|
key: train_fscore
|
|
value: [0.99415205 0.98837209 0.97647059 0.98245614 0.99415205 0.98837209
|
|
0.98285714 0.98245614 0.97076023 0.97701149]
|
|
|
|
mean value: 0.9837060019578956
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.9 0.83333333 1. 0.9 0.77777778
|
|
1. 0.77777778 1. 0.9 ]
|
|
|
|
mean value: 0.8803174603174604
|
|
|
|
key: train_precision
|
|
value: [0.98837209 0.97701149 0.97647059 0.97674419 0.98837209 0.98837209
|
|
0.96629213 0.98823529 0.97647059 0.96590909]
|
|
|
|
mean value: 0.979224965569794
|
|
|
|
key: test_recall
|
|
value: [1. 0.9 1. 0.9 0.9 0.77777778
|
|
0.88888889 0.77777778 0.88888889 1. ]
|
|
|
|
mean value: 0.9033333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.97647059 0.98823529 1. 0.98837209
|
|
1. 0.97674419 0.96511628 0.98837209]
|
|
|
|
mean value: 0.9883310533515732
|
|
|
|
key: test_roc_auc
|
|
value: [0.94285714 0.93571429 0.97142857 0.95 0.93571429 0.86111111
|
|
0.94444444 0.86111111 0.94444444 0.98571429]
|
|
|
|
mean value: 0.9332539682539682
|
|
|
|
key: train_roc_auc
|
|
value: [0.99842767 0.99685535 0.98509064 0.99097299 0.99842767 0.99260876
|
|
0.99526814 0.98679481 0.97941349 0.98946907]
|
|
|
|
mean value: 0.9913328579854308
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.81818182 0.83333333 0.9 0.81818182 0.63636364
|
|
0.88888889 0.63636364 0.88888889 0.9 ]
|
|
|
|
mean value: 0.8034487734487734
|
|
|
|
key: train_jcc
|
|
value: [0.98837209 0.97701149 0.95402299 0.96551724 0.98837209 0.97701149
|
|
0.96629213 0.96551724 0.94318182 0.95505618]
|
|
|
|
mean value: 0.9680354778605187
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12402153 0.1243248 0.12455845 0.07757521 0.10467744 0.13930464
|
|
0.07433939 0.13511848 0.12343383 0.17625237]
|
|
|
|
mean value: 0.1203606128692627
|
|
|
|
key: score_time
|
|
value: [0.02283669 0.02326965 0.02301598 0.01422143 0.01421762 0.01423812
|
|
0.01998258 0.02570605 0.02681637 0.02361226]
|
|
|
|
mean value: 0.02079167366027832
|
|
|
|
key: test_mcc
|
|
value: [ 0.28203804 -0.11527808 0.15118579 0.1440976 0.1440976 0.42947785
|
|
0.42947785 0. 0.30073124 0.08636566]
|
|
|
|
mean value: 0.18521935371278014
|
|
|
|
key: train_mcc
|
|
value: [0.92490645 0.91732242 0.92490645 0.88681085 0.90972171 0.8954321
|
|
0.92557864 0.91053851 0.91811486 0.91811486]
|
|
|
|
mean value: 0.9131446842481175
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.73333333 0.75555556 0.77777778 0.77777778 0.84444444
|
|
0.84444444 0.8 0.81818182 0.77272727]
|
|
|
|
mean value: 0.7924242424242425
|
|
|
|
key: train_accuracy
|
|
value: [0.9751861 0.97270471 0.9751861 0.96277916 0.97022333 0.96526055
|
|
0.9751861 0.97022333 0.97277228 0.97277228]
|
|
|
|
mean value: 0.971229393410805
|
|
|
|
key: test_fscore
|
|
value: [0.18181818 0. 0.26666667 0.16666667 0.16666667 0.46153846
|
|
0.46153846 0. 0.2 0.16666667]
|
|
|
|
mean value: 0.20715617715617715
|
|
|
|
key: train_fscore
|
|
value: [0.9375 0.93081761 0.9375 0.90322581 0.92405063 0.91139241
|
|
0.9382716 0.925 0.93167702 0.93167702]
|
|
|
|
mean value: 0.9271112096694542
|
|
|
|
key: test_precision
|
|
value: [1. 0. 0.4 0.5 0.5 0.75
|
|
0.75 0. 1. 0.33333333]
|
|
|
|
mean value: 0.5233333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.1 0. 0.2 0.1 0.1 0.33333333
|
|
0.33333333 0. 0.11111111 0.11111111]
|
|
|
|
mean value: 0.1388888888888889
|
|
|
|
key: train_recall
|
|
value: [0.88235294 0.87058824 0.88235294 0.82352941 0.85882353 0.8372093
|
|
0.88372093 0.86046512 0.87209302 0.87209302]
|
|
|
|
mean value: 0.8643228454172367
|
|
|
|
key: test_roc_auc
|
|
value: [0.55 0.47142857 0.55714286 0.53571429 0.53571429 0.65277778
|
|
0.65277778 0.5 0.55555556 0.52698413]
|
|
|
|
mean value: 0.5538095238095239
|
|
|
|
key: train_roc_auc
|
|
value: [0.94117647 0.93529412 0.94117647 0.91176471 0.92941176 0.91860465
|
|
0.94186047 0.93023256 0.93604651 0.93604651]
|
|
|
|
mean value: 0.9321614227086183
|
|
|
|
key: test_jcc
|
|
value: [0.1 0. 0.15384615 0.09090909 0.09090909 0.3
|
|
0.3 0. 0.11111111 0.09090909]
|
|
|
|
mean value: 0.12376845376845377
|
|
|
|
key: train_jcc
|
|
value: [0.88235294 0.87058824 0.88235294 0.82352941 0.85882353 0.8372093
|
|
0.88372093 0.86046512 0.87209302 0.87209302]
|
|
|
|
mean value: 0.8643228454172367
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.603302 0.58874249 0.59481955 0.59343886 0.59744143 0.59619856
|
|
0.59295964 0.59112787 0.59000349 0.60498905]
|
|
|
|
mean value: 0.5953022956848144
|
|
|
|
key: score_time
|
|
value: [0.00980949 0.00938582 0.00949502 0.00935316 0.00955439 0.00908589
|
|
0.00924087 0.0092442 0.00940347 0.00969481]
|
|
|
|
mean value: 0.009426712989807129
|
|
|
|
key: test_mcc
|
|
value: [0.79539491 0.93974299 0.88640526 0.93541435 0.93541435 0.72222222
|
|
0.92998111 0.86111111 0.85775039 0.80058335]
|
|
|
|
mean value: 0.8664020040387304
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91111111 0.97777778 0.95555556 0.97777778 0.97777778 0.91111111
|
|
0.97777778 0.95555556 0.95454545 0.93181818]
|
|
|
|
mean value: 0.9530808080808081
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.95238095 0.90909091 0.94736842 0.94736842 0.77777778
|
|
0.94117647 0.88888889 0.875 0.84210526]
|
|
|
|
mean value: 0.8914490437323255
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.90909091 0.83333333 1. 1. 0.77777778
|
|
1. 0.88888889 1. 0.8 ]
|
|
|
|
mean value: 0.8923376623376623
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.9 0.9 0.77777778
|
|
0.88888889 0.88888889 0.77777778 0.88888889]
|
|
|
|
mean value: 0.9022222222222223
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94285714 0.98571429 0.97142857 0.95 0.95 0.86111111
|
|
0.94444444 0.93055556 0.88888889 0.91587302]
|
|
|
|
mean value: 0.9340873015873016
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.90909091 0.83333333 0.9 0.9 0.63636364
|
|
0.88888889 0.8 0.77777778 0.72727273]
|
|
|
|
mean value: 0.8087012987012987
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02624226 0.02912045 0.03728676 0.02721572 0.02735496 0.02728152
|
|
0.02738667 0.0271759 0.02726722 0.03648257]
|
|
|
|
mean value: 0.02928140163421631
|
|
|
|
key: score_time
|
|
value: [0.01231456 0.01246047 0.01229692 0.0137291 0.01877546 0.01791263
|
|
0.01917887 0.01892257 0.01865339 0.01841831]
|
|
|
|
mean value: 0.016266226768493652
|
|
|
|
key: test_mcc
|
|
value: [-0.14285714 -0.14285714 0.28571429 -0.0805823 -0.01889822 -0.07537784
|
|
0. -0.13363062 0.03563483 -0.07733089]
|
|
|
|
mean value: -0.035018503423263264
|
|
|
|
key: train_mcc
|
|
value: [0.21679415 0.25716125 0.16750768 0.16750768 0.23778501 0.16626891
|
|
0.16626891 0.19223123 0.13563329 0.16632318]
|
|
|
|
mean value: 0.18734812868877815
|
|
|
|
key: test_accuracy
|
|
value: [0.71111111 0.71111111 0.8 0.75555556 0.71111111 0.77777778
|
|
0.8 0.73333333 0.75 0.77272727]
|
|
|
|
mean value: 0.7522727272727273
|
|
|
|
key: train_accuracy
|
|
value: [0.80148883 0.80645161 0.79652605 0.79652605 0.80397022 0.79404467
|
|
0.79404467 0.79652605 0.79207921 0.79455446]
|
|
|
|
mean value: 0.7976211827138049
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0.30769231 0. 0.13333333 0.
|
|
0. 0. 0.15384615 0. ]
|
|
|
|
mean value: 0.05948717948717948
|
|
|
|
key: train_fscore
|
|
value: [0.11111111 0.15217391 0.06818182 0.06818182 0.13186813 0.06741573
|
|
0.06741573 0.08888889 0.04545455 0.06741573]
|
|
|
|
mean value: 0.08681074177410279
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0.66666667 0. 0.2 0.
|
|
0. 0. 0.25 0. ]
|
|
|
|
mean value: 0.11166666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0.2 0. 0.1 0.
|
|
0. 0. 0.11111111 0. ]
|
|
|
|
mean value: 0.04111111111111111
|
|
|
|
key: train_recall
|
|
value: [0.05882353 0.08235294 0.03529412 0.03529412 0.07058824 0.03488372
|
|
0.03488372 0.04651163 0.02325581 0.03488372]
|
|
|
|
mean value: 0.045677154582763335
|
|
|
|
key: test_roc_auc
|
|
value: [0.45714286 0.45714286 0.58571429 0.48571429 0.49285714 0.48611111
|
|
0.5 0.45833333 0.51269841 0.48571429]
|
|
|
|
mean value: 0.49214285714285716
|
|
|
|
key: train_roc_auc
|
|
value: [0.52941176 0.54117647 0.51764706 0.51764706 0.53529412 0.51744186
|
|
0.51744186 0.52325581 0.51162791 0.51744186]
|
|
|
|
mean value: 0.5228385772913817
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0.18181818 0. 0.07142857 0.
|
|
0. 0. 0.08333333 0. ]
|
|
|
|
mean value: 0.03365800865800866
|
|
|
|
key: train_jcc
|
|
value: [0.05882353 0.08235294 0.03529412 0.03529412 0.07058824 0.03488372
|
|
0.03488372 0.04651163 0.02325581 0.03488372]
|
|
|
|
mean value: 0.045677154582763335
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03173399 0.03679562 0.03678823 0.03664756 0.03667378 0.03665328
|
|
0.03671861 0.03664446 0.03658366 0.03697801]
|
|
|
|
mean value: 0.03622171878814697
|
|
|
|
key: score_time
|
|
value: [0.01690292 0.02121735 0.02318454 0.02145791 0.02013636 0.0223453
|
|
0.02362728 0.02198172 0.02049947 0.02243876]
|
|
|
|
mean value: 0.02137916088104248
|
|
|
|
key: test_mcc
|
|
value: [0.56660974 0.73379939 0.74285714 0.93541435 0.86991767 0.72222222
|
|
0.70511024 0.70511024 0.78360391 0.86031746]
|
|
|
|
mean value: 0.7624962363625486
|
|
|
|
key: train_mcc
|
|
value: [0.88636035 0.87111155 0.89482822 0.87183415 0.89421536 0.90290464
|
|
0.8873647 0.89514372 0.88875829 0.90296821]
|
|
|
|
mean value: 0.8895489197600339
|
|
|
|
key: test_accuracy
|
|
value: [0.84444444 0.91111111 0.91111111 0.97777778 0.95555556 0.91111111
|
|
0.91111111 0.91111111 0.93181818 0.95454545]
|
|
|
|
mean value: 0.921969696969697
|
|
|
|
key: train_accuracy
|
|
value: [0.96277916 0.95781638 0.96526055 0.95781638 0.96526055 0.96774194
|
|
0.96277916 0.96526055 0.96287129 0.96782178]
|
|
|
|
mean value: 0.9635407709505441
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.75 0.8 0.94736842 0.88888889 0.77777778
|
|
0.75 0.75 0.8 0.88888889]
|
|
|
|
mean value: 0.8019590643274854
|
|
|
|
key: train_fscore
|
|
value: [0.90909091 0.8969697 0.91666667 0.89820359 0.91566265 0.92307692
|
|
0.91017964 0.91666667 0.9122807 0.92307692]
|
|
|
|
mean value: 0.9121874371437515
|
|
|
|
key: test_precision
|
|
value: [0.63636364 1. 0.8 1. 1. 0.77777778
|
|
0.85714286 0.85714286 1. 0.88888889]
|
|
|
|
mean value: 0.8817316017316017
|
|
|
|
key: train_precision
|
|
value: [0.9375 0.925 0.92771084 0.91463415 0.9382716 0.93975904
|
|
0.9382716 0.93902439 0.91764706 0.93975904]
|
|
|
|
mean value: 0.9317577720948089
|
|
|
|
key: test_recall
|
|
value: [0.7 0.6 0.8 0.9 0.8 0.77777778
|
|
0.66666667 0.66666667 0.66666667 0.88888889]
|
|
|
|
mean value: 0.7466666666666667
|
|
|
|
key: train_recall
|
|
value: [0.88235294 0.87058824 0.90588235 0.88235294 0.89411765 0.90697674
|
|
0.88372093 0.89534884 0.90697674 0.90697674]
|
|
|
|
mean value: 0.8935294117647059
|
|
|
|
key: test_roc_auc
|
|
value: [0.79285714 0.8 0.87142857 0.95 0.9 0.86111111
|
|
0.81944444 0.81944444 0.83333333 0.93015873]
|
|
|
|
mean value: 0.8577777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [0.93331484 0.92586016 0.94350721 0.93017018 0.93919719 0.94560194
|
|
0.93397403 0.93978798 0.94248208 0.94562674]
|
|
|
|
mean value: 0.9379522344024807
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.6 0.66666667 0.9 0.8 0.63636364
|
|
0.6 0.6 0.66666667 0.8 ]
|
|
|
|
mean value: 0.676969696969697
|
|
|
|
key: train_jcc
|
|
value: [0.83333333 0.81318681 0.84615385 0.81521739 0.84444444 0.85714286
|
|
0.83516484 0.84615385 0.83870968 0.85714286]
|
|
|
|
mean value: 0.8386649901446536
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.35619187 0.26092649 0.27092147 0.26756215 0.29951382 0.26913643
|
|
0.13800073 0.30824399 0.30735469 0.29082513]
|
|
|
|
mean value: 0.27686767578125
|
|
|
|
key: score_time
|
|
value: [0.02232933 0.02008557 0.02005363 0.01967502 0.0214901 0.02114534
|
|
0.012326 0.02117991 0.02364898 0.02586865]
|
|
|
|
mean value: 0.020780253410339355
|
|
|
|
key: test_mcc
|
|
value: [0.56660974 0.73379939 0.74285714 0.93541435 0.80295507 0.63936201
|
|
0.78467156 0.70511024 0.78360391 0.86031746]
|
|
|
|
mean value: 0.7554700871752625
|
|
|
|
key: train_mcc
|
|
value: [0.9480673 0.87111155 0.89482822 0.87183415 0.93322152 0.91906425
|
|
0.92608759 0.94087008 0.88875829 0.90296821]
|
|
|
|
mean value: 0.9096811163022863
|
|
|
|
key: test_accuracy
|
|
value: [0.84444444 0.91111111 0.91111111 0.97777778 0.93333333 0.88888889
|
|
0.93333333 0.91111111 0.93181818 0.95454545]
|
|
|
|
mean value: 0.9197474747474748
|
|
|
|
key: train_accuracy
|
|
value: [0.98263027 0.95781638 0.96526055 0.95781638 0.97766749 0.97270471
|
|
0.9751861 0.98014888 0.96287129 0.96782178]
|
|
|
|
mean value: 0.96999238385377
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.75 0.8 0.94736842 0.82352941 0.70588235
|
|
0.82352941 0.75 0.8 0.88888889]
|
|
|
|
mean value: 0.7955865153078776
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:114: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:117: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.95906433 0.8969697 0.91666667 0.89820359 0.94736842 0.93641618
|
|
0.94186047 0.95348837 0.9122807 0.92307692]
|
|
|
|
mean value: 0.9285395352000456
|
|
|
|
key: test_precision
|
|
value: [0.63636364 1. 0.8 1. 1. 0.75
|
|
0.875 0.85714286 1. 0.88888889]
|
|
|
|
mean value: 0.8807395382395382
|
|
|
|
key: train_precision
|
|
value: [0.95348837 0.925 0.92771084 0.91463415 0.94186047 0.93103448
|
|
0.94186047 0.95348837 0.91764706 0.93975904]
|
|
|
|
mean value: 0.9346483241860291
|
|
|
|
key: test_recall
|
|
value: [0.7 0.6 0.8 0.9 0.7 0.66666667
|
|
0.77777778 0.66666667 0.66666667 0.88888889]
|
|
|
|
mean value: 0.7366666666666667
|
|
|
|
key: train_recall
|
|
value: [0.96470588 0.87058824 0.90588235 0.88235294 0.95294118 0.94186047
|
|
0.94186047 0.95348837 0.90697674 0.90697674]
|
|
|
|
mean value: 0.9227633378932969
|
|
|
|
key: test_roc_auc
|
|
value: [0.79285714 0.8 0.87142857 0.95 0.85 0.80555556
|
|
0.875 0.81944444 0.83333333 0.93015873]
|
|
|
|
mean value: 0.8527777777777777
|
|
|
|
key: train_roc_auc
|
|
value: [0.97606363 0.92586016 0.94350721 0.93017018 0.96860895 0.96146651
|
|
0.9630438 0.97043504 0.94248208 0.94562674]
|
|
|
|
mean value: 0.9527264301711786
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.6 0.66666667 0.9 0.7 0.54545455
|
|
0.7 0.6 0.66666667 0.8 ]
|
|
|
|
mean value: 0.6678787878787878
|
|
|
|
key: train_jcc
|
|
value: [0.92134831 0.81318681 0.84615385 0.81521739 0.9 0.88043478
|
|
0.89010989 0.91111111 0.83870968 0.85714286]
|
|
|
|
mean value: 0.8673414683643658
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04019237 0.03844786 0.04011631 0.04066586 0.04035711 0.04035902
|
|
0.03861976 0.0399363 0.040308 0.04546833]
|
|
|
|
mean value: 0.04044709205627441
|
|
|
|
key: score_time
|
|
value: [0.0153749 0.01218843 0.01524949 0.01523614 0.01521134 0.01512098
|
|
0.01511264 0.01523185 0.01524115 0.01547027]
|
|
|
|
mean value: 0.014943718910217285
|
|
|
|
key: test_mcc
|
|
value: [0.8594125 0.89282857 0.97220047 0.86237318 0.91587302 0.88880092
|
|
0.80829038 0.94440028 0.85749293 0.85749293]
|
|
|
|
mean value: 0.8859165161079694
|
|
|
|
key: train_mcc
|
|
value: [0.92793496 0.92457661 0.92148635 0.93397229 0.93397229 0.92180881
|
|
0.93098334 0.93098334 0.93098334 0.94062837]
|
|
|
|
mean value: 0.9297329700915211
|
|
|
|
key: test_accuracy
|
|
value: [0.92957746 0.94366197 0.98591549 0.92957746 0.95774648 0.94366197
|
|
0.9 0.97142857 0.92857143 0.92857143]
|
|
|
|
mean value: 0.9418712273641852
|
|
|
|
key: train_accuracy
|
|
value: [0.96377953 0.96220472 0.96062992 0.96692913 0.96692913 0.96062992
|
|
0.96540881 0.96540881 0.96540881 0.97012579]
|
|
|
|
mean value: 0.9647454563462586
|
|
|
|
key: test_fscore
|
|
value: [0.93150685 0.94736842 0.98630137 0.93150685 0.95774648 0.94444444
|
|
0.90666667 0.97222222 0.92957746 0.92753623]
|
|
|
|
mean value: 0.9434876998425146
|
|
|
|
key: train_fscore
|
|
value: [0.96423017 0.9625 0.96099844 0.96723869 0.96723869 0.96136012
|
|
0.96573209 0.96573209 0.96573209 0.97054264]
|
|
|
|
mean value: 0.9651305011094617
|
|
|
|
key: test_precision
|
|
value: [0.91891892 0.9 0.97297297 0.89473684 0.94444444 0.91891892
|
|
0.85 0.94594595 0.91666667 0.94117647]
|
|
|
|
mean value: 0.9203781180561367
|
|
|
|
key: train_precision
|
|
value: [0.95092025 0.95356037 0.95061728 0.95975232 0.95975232 0.94528875
|
|
0.95679012 0.95679012 0.95679012 0.95718654]
|
|
|
|
mean value: 0.9547448213341536
|
|
|
|
key: test_recall
|
|
value: [0.94444444 1. 1. 0.97142857 0.97142857 0.97142857
|
|
0.97142857 1. 0.94285714 0.91428571]
|
|
|
|
mean value: 0.9687301587301587
|
|
|
|
key: train_recall
|
|
value: [0.97791798 0.97160883 0.97160883 0.97484277 0.97484277 0.97798742
|
|
0.97484277 0.97484277 0.97484277 0.98427673]
|
|
|
|
mean value: 0.9757613634109081
|
|
|
|
key: test_roc_auc
|
|
value: [0.92936508 0.94285714 0.98571429 0.93015873 0.95793651 0.94404762
|
|
0.9 0.97142857 0.92857143 0.92857143]
|
|
|
|
mean value: 0.9418650793650793
|
|
|
|
key: train_roc_auc
|
|
value: [0.96380176 0.96221951 0.96064718 0.96691665 0.96691665 0.96060254
|
|
0.96540881 0.96540881 0.96540881 0.97012579]
|
|
|
|
mean value: 0.9647456500605123
|
|
|
|
key: test_jcc
|
|
value: [0.87179487 0.9 0.97297297 0.87179487 0.91891892 0.89473684
|
|
0.82926829 0.94594595 0.86842105 0.86486486]
|
|
|
|
mean value: 0.8938718633712215
|
|
|
|
key: train_jcc
|
|
value: [0.93093093 0.92771084 0.92492492 0.93655589 0.93655589 0.92559524
|
|
0.93373494 0.93373494 0.93373494 0.94277108]
|
|
|
|
mean value: 0.9326249623416387
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.03165054 0.87435389 1.07778573 0.90897632 0.98733664 1.02753234
|
|
0.96403265 1.08114314 0.94869494 1.0607388 ]
|
|
|
|
mean value: 0.9962244987487793
|
|
|
|
key: score_time
|
|
value: [0.01225495 0.01528287 0.01541305 0.01521039 0.01537848 0.01529574
|
|
0.0152874 0.01530361 0.01544237 0.01549196]
|
|
|
|
mean value: 0.01503608226776123
|
|
|
|
key: test_mcc
|
|
value: [1. 0.91580648 0.97220047 0.86237318 0.9451949 0.88862624
|
|
0.860309 0.97182532 0.85749293 0.860309 ]
|
|
|
|
mean value: 0.9134137510457179
|
|
|
|
key: train_mcc
|
|
value: [0.98112197 0.9842961 0.98112197 0.99685535 0.98112159 0.98429564
|
|
0.99057094 0.98432053 0.95599375 0.98744091]
|
|
|
|
mean value: 0.9827138747392403
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.95774648 0.98591549 0.92957746 0.97183099 0.94366197
|
|
0.92857143 0.98571429 0.92857143 0.92857143]
|
|
|
|
mean value: 0.9560160965794768
|
|
|
|
key: train_accuracy
|
|
value: [0.99055118 0.99212598 0.99055118 0.9984252 0.99055118 0.99212598
|
|
0.99528302 0.99213836 0.97798742 0.99371069]
|
|
|
|
mean value: 0.9913450205516763
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95890411 0.98630137 0.93150685 0.97222222 0.94117647
|
|
0.93150685 0.98591549 0.92957746 0.92537313]
|
|
|
|
mean value: 0.9562483962967486
|
|
|
|
key: train_fscore
|
|
value: [0.99056604 0.99215071 0.99056604 0.9984252 0.99059561 0.99217527
|
|
0.99529042 0.99217527 0.97805643 0.99373041]
|
|
|
|
mean value: 0.991373139549226
|
|
|
|
key: test_precision
|
|
value: [1. 0.94594595 0.97297297 0.89473684 0.94594595 0.96969697
|
|
0.89473684 0.97222222 0.91666667 0.96875 ]
|
|
|
|
mean value: 0.948167440766125
|
|
|
|
key: train_precision
|
|
value: [0.98746082 0.9875 0.98746082 1. 0.9875 0.98753894
|
|
0.99373041 0.98753894 0.975 0.990625 ]
|
|
|
|
mean value: 0.9884354919237492
|
|
|
|
key: test_recall
|
|
value: [1. 0.97222222 1. 0.97142857 1. 0.91428571
|
|
0.97142857 1. 0.94285714 0.88571429]
|
|
|
|
mean value: 0.9657936507936508
|
|
|
|
key: train_recall
|
|
value: [0.99369085 0.99684543 0.99369085 0.99685535 0.99371069 0.99685535
|
|
0.99685535 0.99685535 0.98113208 0.99685535]
|
|
|
|
mean value: 0.9943346626192886
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.95753968 0.98571429 0.93015873 0.97222222 0.94325397
|
|
0.92857143 0.98571429 0.92857143 0.92857143]
|
|
|
|
mean value: 0.956031746031746
|
|
|
|
key: train_roc_auc
|
|
value: [0.99055612 0.9921334 0.99055612 0.99842767 0.9905462 0.99211852
|
|
0.99528302 0.99213836 0.97798742 0.99371069]
|
|
|
|
mean value: 0.9913457532289744
|
|
|
|
key: test_jcc
|
|
value: [1. 0.92105263 0.97297297 0.87179487 0.94594595 0.88888889
|
|
0.87179487 0.97222222 0.86842105 0.86111111]
|
|
|
|
mean value: 0.9174204568941411
|
|
|
|
key: train_jcc
|
|
value: [0.98130841 0.98442368 0.98130841 0.99685535 0.98136646 0.98447205
|
|
0.990625 0.98447205 0.95705521 0.98753894]
|
|
|
|
mean value: 0.9829425558894423
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01623416 0.01104116 0.01070976 0.01065731 0.01123166 0.01068211
|
|
0.01051497 0.01104212 0.0105443 0.01056933]
|
|
|
|
mean value: 0.011322689056396485
|
|
|
|
key: score_time
|
|
value: [0.01242399 0.00949788 0.00910449 0.00959778 0.00943327 0.00926232
|
|
0.00902343 0.00998402 0.00900388 0.0091567 ]
|
|
|
|
mean value: 0.009648776054382325
|
|
|
|
key: test_mcc
|
|
value: [0.66190476 0.66322499 0.91580648 0.69292162 0.69023056 0.77460317
|
|
0.72501849 0.57353933 0.74560114 0.6882472 ]
|
|
|
|
mean value: 0.7131097753821127
|
|
|
|
key: train_mcc
|
|
value: [0.78901422 0.76948357 0.75751714 0.78606733 0.76729771 0.77640873
|
|
0.78001307 0.75831545 0.75816534 0.74934607]
|
|
|
|
mean value: 0.7691628629826781
|
|
|
|
key: test_accuracy
|
|
value: [0.83098592 0.83098592 0.95774648 0.84507042 0.84507042 0.88732394
|
|
0.85714286 0.78571429 0.87142857 0.84285714]
|
|
|
|
mean value: 0.8554325955734406
|
|
|
|
key: train_accuracy
|
|
value: [0.89448819 0.88346457 0.87874016 0.89291339 0.88346457 0.88818898
|
|
0.88993711 0.87893082 0.87893082 0.87106918]
|
|
|
|
mean value: 0.8840127767047987
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.82857143 0.95890411 0.84931507 0.84057971 0.88571429
|
|
0.86842105 0.7761194 0.86567164 0.84931507]
|
|
|
|
mean value: 0.8555945101747016
|
|
|
|
key: train_fscore
|
|
value: [0.89481947 0.87828947 0.87912088 0.89440994 0.88544892 0.88888889
|
|
0.89096573 0.88098918 0.88062016 0.87941176]
|
|
|
|
mean value: 0.8852964394905375
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.85294118 0.94594595 0.81578947 0.85294118 0.88571429
|
|
0.80487805 0.8125 0.90625 0.81578947]
|
|
|
|
mean value: 0.852608291408365
|
|
|
|
key: train_precision
|
|
value: [0.890625 0.91752577 0.875 0.88343558 0.87195122 0.8847352
|
|
0.88271605 0.8662614 0.86850153 0.82596685]
|
|
|
|
mean value: 0.8766718605462094
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.80555556 0.97222222 0.88571429 0.82857143 0.88571429
|
|
0.94285714 0.74285714 0.82857143 0.88571429]
|
|
|
|
mean value: 0.8611111111111112
|
|
|
|
key: train_recall
|
|
value: [0.89905363 0.84227129 0.88328076 0.90566038 0.89937107 0.89308176
|
|
0.89937107 0.89622642 0.89308176 0.94025157]
|
|
|
|
mean value: 0.8951649703390672
|
|
|
|
key: test_roc_auc
|
|
value: [0.83095238 0.83134921 0.95753968 0.84563492 0.84484127 0.88730159
|
|
0.85714286 0.78571429 0.87142857 0.84285714]
|
|
|
|
mean value: 0.8554761904761905
|
|
|
|
key: train_roc_auc
|
|
value: [0.89449537 0.8833998 0.8787473 0.89289328 0.88343948 0.88818126
|
|
0.88993711 0.87893082 0.87893082 0.87106918]
|
|
|
|
mean value: 0.8840024403309327
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.70731707 0.92105263 0.73809524 0.725 0.79487179
|
|
0.76744186 0.63414634 0.76315789 0.73809524]
|
|
|
|
mean value: 0.7503463786763037
|
|
|
|
key: train_jcc
|
|
value: [0.80965909 0.7829912 0.78431373 0.80898876 0.79444444 0.8
|
|
0.80337079 0.78729282 0.7867036 0.7847769 ]
|
|
|
|
mean value: 0.7942541335426301
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01169538 0.0109272 0.01177239 0.01087785 0.01081991 0.01128912
|
|
0.01081252 0.01077676 0.01197672 0.01101589]
|
|
|
|
mean value: 0.011196374893188477
|
|
|
|
key: score_time
|
|
value: [0.00908971 0.0090766 0.00990677 0.00980759 0.00910473 0.00981784
|
|
0.0095067 0.00935793 0.0098989 0.0092423 ]
|
|
|
|
mean value: 0.009480905532836915
|
|
|
|
key: test_mcc
|
|
value: [0.66190476 0.66269083 0.7488124 0.66322499 0.67233796 0.67233796
|
|
0.65044364 0.57353933 0.54374562 0.65714286]
|
|
|
|
mean value: 0.6506180337894443
|
|
|
|
key: train_mcc
|
|
value: [0.68479644 0.67888233 0.68143829 0.68397368 0.69439958 0.69735937
|
|
0.68450853 0.68486298 0.69748718 0.70714906]
|
|
|
|
mean value: 0.6894857440069325
|
|
|
|
key: test_accuracy
|
|
value: [0.83098592 0.83098592 0.87323944 0.83098592 0.83098592 0.83098592
|
|
0.81428571 0.78571429 0.77142857 0.82857143]
|
|
|
|
mean value: 0.8228169014084508
|
|
|
|
key: train_accuracy
|
|
value: [0.84094488 0.83779528 0.83937008 0.84094488 0.84566929 0.84724409
|
|
0.84119497 0.84119497 0.84748428 0.85220126]
|
|
|
|
mean value: 0.8434043975635122
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.83783784 0.88 0.83333333 0.84210526 0.84210526
|
|
0.83544304 0.79452055 0.77777778 0.82857143]
|
|
|
|
mean value: 0.8305027823089389
|
|
|
|
key: train_fscore
|
|
value: [0.84766214 0.84511278 0.84592145 0.84720121 0.85285285 0.85413534
|
|
0.84720121 0.84766214 0.85369532 0.85843373]
|
|
|
|
mean value: 0.8499878186662445
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.81578947 0.84615385 0.81081081 0.7804878 0.7804878
|
|
0.75 0.76315789 0.75675676 0.82857143]
|
|
|
|
mean value: 0.7965549153803326
|
|
|
|
key: train_precision
|
|
value: [0.81213873 0.80747126 0.8115942 0.81632653 0.81609195 0.8184438
|
|
0.81632653 0.81449275 0.82028986 0.82369942]
|
|
|
|
mean value: 0.8156875045533096
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.86111111 0.91666667 0.85714286 0.91428571 0.91428571
|
|
0.94285714 0.82857143 0.8 0.82857143]
|
|
|
|
mean value: 0.8696825396825397
|
|
|
|
key: train_recall
|
|
value: [0.88643533 0.88643533 0.88328076 0.88050314 0.89308176 0.89308176
|
|
0.88050314 0.8836478 0.88993711 0.89622642]
|
|
|
|
mean value: 0.8873132551633831
|
|
|
|
key: test_roc_auc
|
|
value: [0.83095238 0.83055556 0.87261905 0.83134921 0.83214286 0.83214286
|
|
0.81428571 0.78571429 0.77142857 0.82857143]
|
|
|
|
mean value: 0.8229761904761904
|
|
|
|
key: train_roc_auc
|
|
value: [0.84101641 0.83787175 0.83943912 0.84088249 0.84559451 0.8471718
|
|
0.84119497 0.84119497 0.84748428 0.85220126]
|
|
|
|
mean value: 0.8434051544550919
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.72093023 0.78571429 0.71428571 0.72727273 0.72727273
|
|
0.7173913 0.65909091 0.63636364 0.70731707]
|
|
|
|
mean value: 0.7109924324362411
|
|
|
|
key: train_jcc
|
|
value: [0.73560209 0.73177083 0.73298429 0.73490814 0.7434555 0.74540682
|
|
0.73490814 0.73560209 0.74473684 0.75197889]
|
|
|
|
mean value: 0.7391353643429629
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01412988 0.01148367 0.01113725 0.01003981 0.01117635 0.01060319
|
|
0.00993705 0.01036882 0.01106143 0.0111208 ]
|
|
|
|
mean value: 0.011105823516845702
|
|
|
|
key: score_time
|
|
value: [0.03953028 0.01311302 0.01348901 0.01340008 0.01327515 0.0129149
|
|
0.01297593 0.01358891 0.01368117 0.01299906]
|
|
|
|
mean value: 0.01589674949645996
|
|
|
|
key: test_mcc
|
|
value: [0.67079854 0.8031746 0.66190476 0.6666743 0.8031746 0.77601295
|
|
0.7581754 0.6350853 0.71899664 0.6882472 ]
|
|
|
|
mean value: 0.7182244286905386
|
|
|
|
key: train_mcc
|
|
value: [0.80552086 0.82797334 0.82422094 0.81862293 0.82420007 0.82137595
|
|
0.82144141 0.8212462 0.81819282 0.8255337 ]
|
|
|
|
mean value: 0.8208328222076421
|
|
|
|
key: test_accuracy
|
|
value: [0.83098592 0.90140845 0.83098592 0.83098592 0.90140845 0.88732394
|
|
0.87142857 0.81428571 0.85714286 0.84285714]
|
|
|
|
mean value: 0.8568812877263582
|
|
|
|
key: train_accuracy
|
|
value: [0.9023622 0.91338583 0.91181102 0.90866142 0.91181102 0.91023622
|
|
0.91037736 0.91037736 0.90880503 0.91194969]
|
|
|
|
mean value: 0.9099777150497698
|
|
|
|
key: test_fscore
|
|
value: [0.84615385 0.90140845 0.83333333 0.83783784 0.90140845 0.88888889
|
|
0.88311688 0.82666667 0.86486486 0.8358209 ]
|
|
|
|
mean value: 0.861950011779316
|
|
|
|
key: train_fscore
|
|
value: [0.90432099 0.91551459 0.91331269 0.91131498 0.91358025 0.9124424
|
|
0.91217257 0.91190108 0.91049383 0.91463415]
|
|
|
|
mean value: 0.9119687530631163
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.91428571 0.83333333 0.79487179 0.88888889 0.86486486
|
|
0.80952381 0.775 0.82051282 0.875 ]
|
|
|
|
mean value: 0.8361995511995512
|
|
|
|
key: train_precision
|
|
value: [0.88519637 0.89221557 0.89665653 0.88690476 0.8969697 0.89189189
|
|
0.89425982 0.89665653 0.89393939 0.88757396]
|
|
|
|
mean value: 0.892226454132735
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.88888889 0.83333333 0.88571429 0.91428571 0.91428571
|
|
0.97142857 0.88571429 0.91428571 0.8 ]
|
|
|
|
mean value: 0.8924603174603174
|
|
|
|
key: train_recall
|
|
value: [0.92429022 0.94006309 0.93059937 0.93710692 0.93081761 0.93396226
|
|
0.93081761 0.92767296 0.92767296 0.94339623]
|
|
|
|
mean value: 0.9326399222268515
|
|
|
|
key: test_roc_auc
|
|
value: [0.8297619 0.9015873 0.83095238 0.83174603 0.9015873 0.88769841
|
|
0.87142857 0.81428571 0.85714286 0.84285714]
|
|
|
|
mean value: 0.8569047619047618
|
|
|
|
key: train_roc_auc
|
|
value: [0.90239668 0.91342777 0.91184057 0.90861655 0.91178104 0.9101988
|
|
0.91037736 0.91037736 0.90880503 0.91194969]
|
|
|
|
mean value: 0.9099770846973395
|
|
|
|
key: test_jcc
|
|
value: [0.73333333 0.82051282 0.71428571 0.72093023 0.82051282 0.8
|
|
0.79069767 0.70454545 0.76190476 0.71794872]
|
|
|
|
mean value: 0.7584671530020367
|
|
|
|
key: train_jcc
|
|
value: [0.82535211 0.84419263 0.84045584 0.83707865 0.84090909 0.83898305
|
|
0.83852691 0.83806818 0.83569405 0.84269663]
|
|
|
|
mean value: 0.8381957155339215
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02920961 0.027812 0.02740526 0.02753949 0.02701044 0.02753711
|
|
0.02794838 0.02919984 0.02685142 0.03176022]
|
|
|
|
mean value: 0.02822737693786621
|
|
|
|
key: score_time
|
|
value: [0.01313853 0.01330638 0.01325226 0.01335096 0.01337075 0.01328874
|
|
0.01332951 0.01329422 0.0130744 0.01326728]
|
|
|
|
mean value: 0.013267302513122558
|
|
|
|
key: test_mcc
|
|
value: [0.72242312 0.8031746 0.80301852 0.74940548 0.81932673 0.72329377
|
|
0.72501849 0.72501849 0.77651637 0.80032673]
|
|
|
|
mean value: 0.7647522291382015
|
|
|
|
key: train_mcc
|
|
value: [0.85899676 0.84965531 0.83790004 0.84963453 0.86168508 0.82558713
|
|
0.84947673 0.85561671 0.85617604 0.86002703]
|
|
|
|
mean value: 0.8504755361483023
|
|
|
|
key: test_accuracy
|
|
value: [0.85915493 0.90140845 0.90140845 0.87323944 0.90140845 0.85915493
|
|
0.85714286 0.85714286 0.88571429 0.9 ]
|
|
|
|
mean value: 0.8795774647887323
|
|
|
|
key: train_accuracy
|
|
value: [0.92913386 0.92440945 0.91811024 0.92440945 0.93070866 0.91181102
|
|
0.9245283 0.92767296 0.92767296 0.92924528]
|
|
|
|
mean value: 0.92477021740207
|
|
|
|
key: test_fscore
|
|
value: [0.86842105 0.90140845 0.90410959 0.87671233 0.90909091 0.86486486
|
|
0.86842105 0.86842105 0.89189189 0.89855072]
|
|
|
|
mean value: 0.8851891916892528
|
|
|
|
key: train_fscore
|
|
value: [0.93044822 0.92592593 0.9204893 0.92615385 0.93167702 0.91489362
|
|
0.92569659 0.92857143 0.92923077 0.93129771]
|
|
|
|
mean value: 0.926438442908947
|
|
|
|
key: test_precision
|
|
value: [0.825 0.91428571 0.89189189 0.84210526 0.83333333 0.82051282
|
|
0.80487805 0.80487805 0.84615385 0.91176471]
|
|
|
|
mean value: 0.8494803672778829
|
|
|
|
key: train_precision
|
|
value: [0.91212121 0.90634441 0.89317507 0.90662651 0.9202454 0.88529412
|
|
0.91158537 0.91717791 0.90963855 0.90504451]
|
|
|
|
mean value: 0.9067253064192194
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.88888889 0.91666667 0.91428571 1. 0.91428571
|
|
0.94285714 0.94285714 0.94285714 0.88571429]
|
|
|
|
mean value: 0.9265079365079365
|
|
|
|
key: train_recall
|
|
value: [0.94952681 0.94637224 0.94952681 0.94654088 0.94339623 0.94654088
|
|
0.94025157 0.94025157 0.94968553 0.9591195 ]
|
|
|
|
mean value: 0.9471212031029899
|
|
|
|
key: test_roc_auc
|
|
value: [0.85833333 0.9015873 0.90119048 0.87380952 0.90277778 0.85992063
|
|
0.85714286 0.85714286 0.88571429 0.9 ]
|
|
|
|
mean value: 0.8797619047619047
|
|
|
|
key: train_roc_auc
|
|
value: [0.92916592 0.92444398 0.91815963 0.92437454 0.93068865 0.91175624
|
|
0.9245283 0.92767296 0.92767296 0.92924528]
|
|
|
|
mean value: 0.9247708469733944
|
|
|
|
key: test_jcc
|
|
value: [0.76744186 0.82051282 0.825 0.7804878 0.83333333 0.76190476
|
|
0.76744186 0.76744186 0.80487805 0.81578947]
|
|
|
|
mean value: 0.7944231824489012
|
|
|
|
key: train_jcc
|
|
value: [0.8699422 0.86206897 0.85269122 0.86246418 0.87209302 0.84313725
|
|
0.86167147 0.86666667 0.86781609 0.87142857]
|
|
|
|
mean value: 0.8629979641508103
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.39154792 2.55098391 2.25747824 1.25583816 2.07758808 1.38279271
|
|
2.30183673 2.15927553 1.89091611 2.27040434]
|
|
|
|
mean value: 2.053866171836853
|
|
|
|
key: score_time
|
|
value: [0.01750112 0.01835561 0.01246715 0.01239181 0.01242304 0.01238513
|
|
0.0123868 0.01239491 0.01244998 0.01243305]
|
|
|
|
mean value: 0.013518857955932616
|
|
|
|
key: test_mcc
|
|
value: [0.94365079 0.94365079 0.94511009 0.77565853 0.91885703 0.88880092
|
|
0.860309 0.91766294 0.91465912 0.8871639 ]
|
|
|
|
mean value: 0.8995523118371359
|
|
|
|
key: train_mcc
|
|
value: [0.98742126 0.99055612 0.98740154 0.95333063 0.98117981 0.96250874
|
|
0.99057094 0.9812097 0.98113208 0.99057094]
|
|
|
|
mean value: 0.9805881757757795
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 0.97183099 0.97183099 0.88732394 0.95774648 0.94366197
|
|
0.92857143 0.95714286 0.95714286 0.94285714]
|
|
|
|
mean value: 0.9489939637826962
|
|
|
|
key: train_accuracy
|
|
value: [0.99370079 0.99527559 0.99370079 0.97637795 0.99055118 0.98110236
|
|
0.99528302 0.99056604 0.99056604 0.99528302]
|
|
|
|
mean value: 0.990240677462487
|
|
|
|
key: test_fscore
|
|
value: [0.97222222 0.97222222 0.97297297 0.88235294 0.95890411 0.94444444
|
|
0.93150685 0.95890411 0.95774648 0.94117647]
|
|
|
|
mean value: 0.9492452820992958
|
|
|
|
key: train_fscore
|
|
value: [0.99371069 0.99527559 0.99369085 0.976 0.990625 0.98136646
|
|
0.99529042 0.990625 0.99056604 0.99529042]
|
|
|
|
mean value: 0.9902440479196979
|
|
|
|
key: test_precision
|
|
value: [0.97222222 0.97222222 0.94736842 0.90909091 0.92105263 0.91891892
|
|
0.89473684 0.92105263 0.94444444 0.96969697]
|
|
|
|
mean value: 0.9370806212911476
|
|
|
|
key: train_precision
|
|
value: [0.99059561 0.99371069 0.99369085 0.99348534 0.98447205 0.96932515
|
|
0.99373041 0.98447205 0.99056604 0.99373041]
|
|
|
|
mean value: 0.9887778602399712
|
|
|
|
key: test_recall
|
|
value: [0.97222222 0.97222222 1. 0.85714286 1. 0.97142857
|
|
0.97142857 1. 0.97142857 0.91428571]
|
|
|
|
mean value: 0.963015873015873
|
|
|
|
key: train_recall
|
|
value: [0.99684543 0.99684543 0.99369085 0.9591195 0.99685535 0.99371069
|
|
0.99685535 0.99685535 0.99056604 0.99685535]
|
|
|
|
mean value: 0.9918199313532925
|
|
|
|
key: test_roc_auc
|
|
value: [0.9718254 0.9718254 0.97142857 0.88690476 0.95833333 0.94404762
|
|
0.92857143 0.95714286 0.95714286 0.94285714]
|
|
|
|
mean value: 0.9490079365079365
|
|
|
|
key: train_roc_auc
|
|
value: [0.99370573 0.99527806 0.99370077 0.97640517 0.99054124 0.98108248
|
|
0.99528302 0.99056604 0.99056604 0.99528302]
|
|
|
|
mean value: 0.9902411562803801
|
|
|
|
key: test_jcc
|
|
value: [0.94594595 0.94594595 0.94736842 0.78947368 0.92105263 0.89473684
|
|
0.87179487 0.92105263 0.91891892 0.88888889]
|
|
|
|
mean value: 0.9045178782020887
|
|
|
|
key: train_jcc
|
|
value: [0.9875 0.99059561 0.98746082 0.953125 0.98142415 0.96341463
|
|
0.990625 0.98142415 0.98130841 0.990625 ]
|
|
|
|
mean value: 0.9807502768907206
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05475903 0.03686571 0.03814268 0.03290677 0.03494811 0.03826475
|
|
0.04297853 0.03771758 0.04533052 0.04267597]
|
|
|
|
mean value: 0.040458965301513675
|
|
|
|
key: score_time
|
|
value: [0.00958753 0.00902009 0.01002622 0.00923729 0.00937223 0.00918102
|
|
0.0092845 0.00953078 0.00986075 0.00986576]
|
|
|
|
mean value: 0.009496617317199706
|
|
|
|
key: test_mcc
|
|
value: [0.8594125 0.97220047 0.91587302 0.89315217 0.91885703 0.86205133
|
|
0.91766294 0.97182532 0.91465912 0.77651637]
|
|
|
|
mean value: 0.9002210259552061
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92957746 0.98591549 0.95774648 0.94366197 0.95774648 0.92957746
|
|
0.95714286 0.98571429 0.95714286 0.88571429]
|
|
|
|
mean value: 0.9489939637826962
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93150685 0.98630137 0.95774648 0.94594595 0.95890411 0.92537313
|
|
0.95890411 0.98550725 0.95774648 0.87878788]
|
|
|
|
mean value: 0.9486723601541638
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91891892 0.97297297 0.97142857 0.8974359 0.92105263 0.96875
|
|
0.92105263 1. 0.94444444 0.93548387]
|
|
|
|
mean value: 0.9451539939326442
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.94444444 1. 0.94444444 1. 1. 0.88571429
|
|
1. 0.97142857 0.97142857 0.82857143]
|
|
|
|
mean value: 0.9546031746031746
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92936508 0.98571429 0.95793651 0.94444444 0.95833333 0.92896825
|
|
0.95714286 0.98571429 0.95714286 0.88571429]
|
|
|
|
mean value: 0.949047619047619
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.87179487 0.97297297 0.91891892 0.8974359 0.92105263 0.86111111
|
|
0.92105263 0.97142857 0.91891892 0.78378378]
|
|
|
|
mean value: 0.9038470309522941
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.0
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14572811 0.14529777 0.1437459 0.13963914 0.14390397 0.1434176
|
|
0.14296389 0.14153433 0.14098859 0.13989639]
|
|
|
|
mean value: 0.14271156787872313
|
|
|
|
key: score_time
|
|
value: [0.01971769 0.01968718 0.01963449 0.01867747 0.0199399 0.02006769
|
|
0.0198648 0.01953149 0.01945806 0.0182817 ]
|
|
|
|
mean value: 0.019486045837402342
|
|
|
|
key: test_mcc
|
|
value: [0.77601295 0.97222222 0.88880092 0.85952381 0.85952381 0.8594125
|
|
0.80032673 0.94440028 0.85749293 0.8340361 ]
|
|
|
|
mean value: 0.8651752248737093
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88732394 0.98591549 0.94366197 0.92957746 0.92957746 0.92957746
|
|
0.9 0.97142857 0.92857143 0.91428571]
|
|
|
|
mean value: 0.9319919517102616
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88571429 0.98591549 0.94285714 0.92957746 0.92957746 0.92753623
|
|
0.90140845 0.97222222 0.92957746 0.90909091]
|
|
|
|
mean value: 0.9313477129796787
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91176471 1. 0.97058824 0.91666667 0.91666667 0.94117647
|
|
0.88888889 0.94594595 0.91666667 0.96774194]
|
|
|
|
mean value: 0.9376106182083411
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.86111111 0.97222222 0.91666667 0.94285714 0.94285714 0.91428571
|
|
0.91428571 1. 0.94285714 0.85714286]
|
|
|
|
mean value: 0.9264285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88769841 0.98611111 0.94404762 0.9297619 0.9297619 0.92936508
|
|
0.9 0.97142857 0.92857143 0.91428571]
|
|
|
|
mean value: 0.9321031746031746
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.79487179 0.97222222 0.89189189 0.86842105 0.86842105 0.86486486
|
|
0.82051282 0.94594595 0.86842105 0.83333333]
|
|
|
|
mean value: 0.8728906031537611
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01223874 0.01112509 0.01053476 0.01066732 0.0108192 0.01127529
|
|
0.01141167 0.0110414 0.01176643 0.01125789]
|
|
|
|
mean value: 0.01121377944946289
|
|
|
|
key: score_time
|
|
value: [0.00919414 0.00897932 0.0089457 0.00893712 0.0091548 0.00947285
|
|
0.00935769 0.00905871 0.00959039 0.0096612 ]
|
|
|
|
mean value: 0.009235191345214843
|
|
|
|
key: test_mcc
|
|
value: [0.43764907 0.7488124 0.55043703 0.64082051 0.71917468 0.58401869
|
|
0.6614769 0.68599434 0.65821838 0.57166195]
|
|
|
|
mean value: 0.6258263944138113
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71830986 0.87323944 0.77464789 0.81690141 0.85915493 0.78873239
|
|
0.82857143 0.84285714 0.82857143 0.78571429]
|
|
|
|
mean value: 0.8116700201207243
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.88 0.77142857 0.82666667 0.85294118 0.8
|
|
0.83783784 0.84507042 0.82352941 0.7826087 ]
|
|
|
|
mean value: 0.813436849664147
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.73529412 0.84615385 0.79411765 0.775 0.87878788 0.75
|
|
0.79487179 0.83333333 0.84848485 0.79411765]
|
|
|
|
mean value: 0.8050161113396408
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.69444444 0.91666667 0.75 0.88571429 0.82857143 0.85714286
|
|
0.88571429 0.85714286 0.8 0.77142857]
|
|
|
|
mean value: 0.8246825396825397
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.71865079 0.87261905 0.775 0.81785714 0.85873016 0.78968254
|
|
0.82857143 0.84285714 0.82857143 0.78571429]
|
|
|
|
mean value: 0.8118253968253969
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.78571429 0.62790698 0.70454545 0.74358974 0.66666667
|
|
0.72093023 0.73170732 0.7 0.64285714]
|
|
|
|
mean value: 0.6879473375304346
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.34476495 2.31745625 2.32805562 2.30251503 2.34669495 2.47962403
|
|
2.42952299 2.4669199 2.41789174 2.47657204]
|
|
|
|
mean value: 2.391001749038696
|
|
|
|
key: score_time
|
|
value: [0.09617758 0.09518242 0.09966493 0.09685016 0.10369325 0.10365868
|
|
0.10375142 0.1043725 0.10385966 0.10429358]
|
|
|
|
mean value: 0.10115041732788085
|
|
|
|
key: test_mcc
|
|
value: [0.94365079 1. 0.97222222 0.9451949 0.91587302 0.97220047
|
|
0.91766294 1. 0.88571429 0.8871639 ]
|
|
|
|
mean value: 0.9439682516412394
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 1. 0.98591549 0.97183099 0.95774648 0.98591549
|
|
0.95714286 1. 0.94285714 0.94285714]
|
|
|
|
mean value: 0.9716096579476862
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97222222 1. 0.98591549 0.97222222 0.95774648 0.98550725
|
|
0.95890411 1. 0.94285714 0.94117647]
|
|
|
|
mean value: 0.9716551385686661
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97222222 1. 1. 0.94594595 0.94444444 1.
|
|
0.92105263 1. 0.94285714 0.96969697]
|
|
|
|
mean value: 0.9696219356745672
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97222222 1. 0.97222222 1. 0.97142857 0.97142857
|
|
1. 1. 0.94285714 0.91428571]
|
|
|
|
mean value: 0.9744444444444444
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9718254 1. 0.98611111 0.97222222 0.95793651 0.98571429
|
|
0.95714286 1. 0.94285714 0.94285714]
|
|
|
|
mean value: 0.9716666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94594595 1. 0.97222222 0.94594595 0.91891892 0.97142857
|
|
0.92105263 1. 0.89189189 0.88888889]
|
|
|
|
mean value: 0.9456295016821332
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.07728386 1.06004357 1.04589748 1.03525448 1.05557489 1.08346057
|
|
1.04775834 1.07488894 1.04446387 1.06593585]
|
|
|
|
mean value: 1.0590561866760253
|
|
|
|
key: score_time
|
|
value: [0.27090263 0.22680426 0.25386167 0.28055453 0.25919867 0.24481535
|
|
0.2539494 0.21980405 0.32444096 0.15252423]
|
|
|
|
mean value: 0.24868557453155518
|
|
|
|
key: test_mcc
|
|
value: [0.91587302 0.97222222 0.9451949 0.91587302 0.91587302 0.97220047
|
|
0.91766294 1. 0.88571429 0.89155583]
|
|
|
|
mean value: 0.933216968184954
|
|
|
|
key: train_mcc
|
|
value: [0.96547483 0.96228175 0.96858135 0.97169643 0.97169643 0.97188874
|
|
0.97174136 0.97193362 0.9749199 0.96872591]
|
|
|
|
mean value: 0.9698940307032651
|
|
|
|
key: test_accuracy
|
|
value: [0.95774648 0.98591549 0.97183099 0.95774648 0.95774648 0.98591549
|
|
0.95714286 1. 0.94285714 0.94285714]
|
|
|
|
mean value: 0.9659758551307847
|
|
|
|
key: train_accuracy
|
|
value: [0.98267717 0.98110236 0.98425197 0.98582677 0.98582677 0.98582677
|
|
0.98584906 0.98584906 0.98742138 0.98427673]
|
|
|
|
mean value: 0.9848908037438716
|
|
|
|
key: test_fscore
|
|
value: [0.95774648 0.98591549 0.97142857 0.95774648 0.95774648 0.98550725
|
|
0.95890411 1. 0.94285714 0.93939394]
|
|
|
|
mean value: 0.9657245939222971
|
|
|
|
key: train_fscore
|
|
value: [0.9827856 0.98119122 0.98432602 0.98591549 0.98591549 0.98600311
|
|
0.98591549 0.98600311 0.9875 0.98442368]
|
|
|
|
mean value: 0.9849979219608735
|
|
|
|
key: test_precision
|
|
value: [0.97142857 1. 1. 0.94444444 0.94444444 1.
|
|
0.92105263 1. 0.94285714 1. ]
|
|
|
|
mean value: 0.9724227234753551
|
|
|
|
key: train_precision
|
|
value: [0.97515528 0.97507788 0.97819315 0.98130841 0.98130841 0.97538462
|
|
0.98130841 0.97538462 0.98136646 0.97530864]
|
|
|
|
mean value: 0.9779795873557218
|
|
|
|
key: test_recall
|
|
value: [0.94444444 0.97222222 0.94444444 0.97142857 0.97142857 0.97142857
|
|
1. 1. 0.94285714 0.88571429]
|
|
|
|
mean value: 0.9603968253968254
|
|
|
|
key: train_recall
|
|
value: [0.99053628 0.9873817 0.99053628 0.99056604 0.99056604 0.99685535
|
|
0.99056604 0.99685535 0.99371069 0.99371069]
|
|
|
|
mean value: 0.9921284447354324
|
|
|
|
key: test_roc_auc
|
|
value: [0.95793651 0.98611111 0.97222222 0.95793651 0.95793651 0.98571429
|
|
0.95714286 1. 0.94285714 0.94285714]
|
|
|
|
mean value: 0.9660714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.98268952 0.98111224 0.98426185 0.9858193 0.9858193 0.98580938
|
|
0.98584906 0.98584906 0.98742138 0.98427673]
|
|
|
|
mean value: 0.9848907803106958
|
|
|
|
key: test_jcc
|
|
value: [0.91891892 0.97222222 0.94444444 0.91891892 0.91891892 0.97142857
|
|
0.92105263 1. 0.89189189 0.88571429]
|
|
|
|
mean value: 0.934351080403712
|
|
|
|
key: train_jcc
|
|
value: [0.96615385 0.96307692 0.9691358 0.97222222 0.97222222 0.97239264
|
|
0.97222222 0.97239264 0.97530864 0.96932515]
|
|
|
|
mean value: 0.9704452309789733
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01238251 0.01193786 0.01179004 0.01183772 0.01118922 0.01190543
|
|
0.01156974 0.01116109 0.01178312 0.01176667]
|
|
|
|
mean value: 0.011732339859008789
|
|
|
|
key: score_time
|
|
value: [0.00994253 0.01389623 0.00982594 0.00999951 0.00992393 0.0098207
|
|
0.00968027 0.00960231 0.00980115 0.00959516]
|
|
|
|
mean value: 0.010208773612976074
|
|
|
|
key: test_mcc
|
|
value: [0.66190476 0.66269083 0.7488124 0.66322499 0.67233796 0.67233796
|
|
0.65044364 0.57353933 0.54374562 0.65714286]
|
|
|
|
mean value: 0.6506180337894443
|
|
|
|
key: train_mcc
|
|
value: [0.68479644 0.67888233 0.68143829 0.68397368 0.69439958 0.69735937
|
|
0.68450853 0.68486298 0.69748718 0.70714906]
|
|
|
|
mean value: 0.6894857440069325
|
|
|
|
key: test_accuracy
|
|
value: [0.83098592 0.83098592 0.87323944 0.83098592 0.83098592 0.83098592
|
|
0.81428571 0.78571429 0.77142857 0.82857143]
|
|
|
|
mean value: 0.8228169014084508
|
|
|
|
key: train_accuracy
|
|
value: [0.84094488 0.83779528 0.83937008 0.84094488 0.84566929 0.84724409
|
|
0.84119497 0.84119497 0.84748428 0.85220126]
|
|
|
|
mean value: 0.8434043975635122
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.83783784 0.88 0.83333333 0.84210526 0.84210526
|
|
0.83544304 0.79452055 0.77777778 0.82857143]
|
|
|
|
mean value: 0.8305027823089389
|
|
|
|
key: train_fscore
|
|
value: [0.84766214 0.84511278 0.84592145 0.84720121 0.85285285 0.85413534
|
|
0.84720121 0.84766214 0.85369532 0.85843373]
|
|
|
|
mean value: 0.8499878186662445
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.81578947 0.84615385 0.81081081 0.7804878 0.7804878
|
|
0.75 0.76315789 0.75675676 0.82857143]
|
|
|
|
mean value: 0.7965549153803326
|
|
|
|
key: train_precision
|
|
value: [0.81213873 0.80747126 0.8115942 0.81632653 0.81609195 0.8184438
|
|
0.81632653 0.81449275 0.82028986 0.82369942]
|
|
|
|
mean value: 0.8156875045533096
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.86111111 0.91666667 0.85714286 0.91428571 0.91428571
|
|
0.94285714 0.82857143 0.8 0.82857143]
|
|
|
|
mean value: 0.8696825396825397
|
|
|
|
key: train_recall
|
|
value: [0.88643533 0.88643533 0.88328076 0.88050314 0.89308176 0.89308176
|
|
0.88050314 0.8836478 0.88993711 0.89622642]
|
|
|
|
mean value: 0.8873132551633831
|
|
|
|
key: test_roc_auc
|
|
value: [0.83095238 0.83055556 0.87261905 0.83134921 0.83214286 0.83214286
|
|
0.81428571 0.78571429 0.77142857 0.82857143]
|
|
|
|
mean value: 0.8229761904761904
|
|
|
|
key: train_roc_auc
|
|
value: [0.84101641 0.83787175 0.83943912 0.84088249 0.84559451 0.8471718
|
|
0.84119497 0.84119497 0.84748428 0.85220126]
|
|
|
|
mean value: 0.8434051544550919
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.72093023 0.78571429 0.71428571 0.72727273 0.72727273
|
|
0.7173913 0.65909091 0.63636364 0.70731707]
|
|
|
|
mean value: 0.7109924324362411
|
|
|
|
key: train_jcc
|
|
value: [0.73560209 0.73177083 0.73298429 0.73490814 0.7434555 0.74540682
|
|
0.73490814 0.73560209 0.74473684 0.75197889]
|
|
|
|
mean value: 0.7391353643429629
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.27475119 0.10039711 0.10305262 0.09833884 0.09866333 0.11542916
|
|
0.1027298 0.10382342 0.10281277 0.10204697]
|
|
|
|
mean value: 0.1202045202255249
|
|
|
|
key: score_time
|
|
value: [0.01139998 0.01204729 0.01201963 0.01107144 0.01126814 0.01200366
|
|
0.01203513 0.01206517 0.01205444 0.01162744]
|
|
|
|
mean value: 0.011759233474731446
|
|
|
|
key: test_mcc
|
|
value: [0.94511009 1. 0.9451949 0.9451949 0.91885703 0.9186708
|
|
0.91465912 1. 0.94440028 0.91465912]
|
|
|
|
mean value: 0.9446746242479909
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 1. 0.97183099 0.97183099 0.95774648 0.95774648
|
|
0.95714286 1. 0.97142857 0.95714286]
|
|
|
|
mean value: 0.9716700201207243
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97297297 1. 0.97142857 0.97222222 0.95890411 0.95522388
|
|
0.95774648 1. 0.97222222 0.95652174]
|
|
|
|
mean value: 0.9717242197035719
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94736842 1. 1. 0.94594595 0.92105263 1.
|
|
0.94444444 1. 0.94594595 0.97058824]
|
|
|
|
mean value: 0.9675345624262033
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.94444444 1. 1. 0.91428571
|
|
0.97142857 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9773015873015873
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97142857 1. 0.97222222 0.97222222 0.95833333 0.95714286
|
|
0.95714286 1. 0.97142857 0.95714286]
|
|
|
|
mean value: 0.9717063492063492
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94736842 1. 0.94444444 0.94594595 0.92105263 0.91428571
|
|
0.91891892 1. 0.94594595 0.91666667]
|
|
|
|
mean value: 0.9454628688839215
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04684234 0.08706141 0.05413485 0.07770586 0.05042696 0.12897992
|
|
0.09746456 0.07561111 0.07489705 0.05132127]
|
|
|
|
mean value: 0.07444453239440918
|
|
|
|
key: score_time
|
|
value: [0.02085185 0.01243186 0.01910663 0.01215529 0.01213169 0.04141092
|
|
0.01944542 0.01217723 0.01244926 0.01213193]
|
|
|
|
mean value: 0.017429208755493163
|
|
|
|
key: test_mcc
|
|
value: [0.94365079 0.94511009 0.97220047 0.91587302 0.91885703 0.8031746
|
|
0.94440028 0.94440028 0.860309 0.88571429]
|
|
|
|
mean value: 0.9133689851614359
|
|
|
|
key: train_mcc
|
|
value: [0.95598214 0.95598214 0.95598214 0.97177328 0.95905957 0.96228025
|
|
0.96545177 0.96541358 0.96234029 0.96863009]
|
|
|
|
mean value: 0.9622895265862006
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 0.97183099 0.98591549 0.95774648 0.95774648 0.90140845
|
|
0.97142857 0.97142857 0.92857143 0.94285714]
|
|
|
|
mean value: 0.9560764587525151
|
|
|
|
key: train_accuracy
|
|
value: [0.97795276 0.97795276 0.97795276 0.98582677 0.97952756 0.98110236
|
|
0.9827044 0.9827044 0.98113208 0.98427673]
|
|
|
|
mean value: 0.9811132570692814
|
|
|
|
key: test_fscore
|
|
value: [0.97222222 0.97297297 0.98630137 0.95774648 0.95890411 0.90140845
|
|
0.97222222 0.97222222 0.93150685 0.94285714]
|
|
|
|
mean value: 0.9568364040841371
|
|
|
|
key: train_fscore
|
|
value: [0.97805643 0.97805643 0.97805643 0.98595944 0.97959184 0.98125
|
|
0.9827856 0.98273155 0.98125 0.984375 ]
|
|
|
|
mean value: 0.9812112710773132
|
|
|
|
key: test_precision
|
|
value: [0.97222222 0.94736842 0.97297297 0.94444444 0.92105263 0.88888889
|
|
0.94594595 0.94594595 0.89473684 0.94285714]
|
|
|
|
mean value: 0.9376435458014405
|
|
|
|
key: train_precision
|
|
value: [0.97196262 0.97196262 0.97196262 0.97832817 0.97805643 0.97515528
|
|
0.97819315 0.98119122 0.97515528 0.97826087]
|
|
|
|
mean value: 0.9760228247733598
|
|
|
|
key: test_recall
|
|
value: [0.97222222 1. 1. 0.97142857 1. 0.91428571
|
|
1. 1. 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9772222222222222
|
|
|
|
key: train_recall
|
|
value: [0.98422713 0.98422713 0.98422713 0.99371069 0.98113208 0.98742138
|
|
0.98742138 0.98427673 0.98742138 0.99056604]
|
|
|
|
mean value: 0.986463107354721
|
|
|
|
key: test_roc_auc
|
|
value: [0.9718254 0.97142857 0.98571429 0.95793651 0.95833333 0.9015873
|
|
0.97142857 0.97142857 0.92857143 0.94285714]
|
|
|
|
mean value: 0.9561111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.97796262 0.97796262 0.97796262 0.98581434 0.97952503 0.9810924
|
|
0.9827044 0.9827044 0.98113208 0.98427673]
|
|
|
|
mean value: 0.9811137233894808
|
|
|
|
key: test_jcc
|
|
value: [0.94594595 0.94736842 0.97297297 0.91891892 0.92105263 0.82051282
|
|
0.94594595 0.94594595 0.87179487 0.89189189]
|
|
|
|
mean value: 0.9182350366560893
|
|
|
|
key: train_jcc
|
|
value: [0.95705521 0.95705521 0.95705521 0.97230769 0.96 0.96319018
|
|
0.96615385 0.96604938 0.96319018 0.96923077]
|
|
|
|
mean value: 0.9631287702678296
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0149889 0.01083922 0.01063538 0.01031995 0.01051164 0.01076412
|
|
0.01019216 0.01021242 0.0103147 0.01053262]
|
|
|
|
mean value: 0.010931110382080078
|
|
|
|
key: score_time
|
|
value: [0.03157735 0.00931001 0.00917959 0.00869751 0.00879598 0.00879335
|
|
0.0088377 0.00893331 0.00896525 0.00888109]
|
|
|
|
mean value: 0.011197113990783691
|
|
|
|
key: test_mcc
|
|
value: [0.60555556 0.60620698 0.69643609 0.71961897 0.6666743 0.70470171
|
|
0.69985421 0.69282032 0.68599434 0.77269114]
|
|
|
|
mean value: 0.6850553621528965
|
|
|
|
key: train_mcc
|
|
value: [0.70866789 0.71167734 0.69303688 0.70961788 0.71676681 0.72405002
|
|
0.71571095 0.69679348 0.7030993 0.70846356]
|
|
|
|
mean value: 0.7087884107566691
|
|
|
|
key: test_accuracy
|
|
value: [0.8028169 0.8028169 0.84507042 0.85915493 0.83098592 0.84507042
|
|
0.84285714 0.84285714 0.84285714 0.88571429]
|
|
|
|
mean value: 0.8400201207243461
|
|
|
|
key: train_accuracy
|
|
value: [0.85354331 0.85511811 0.84566929 0.85354331 0.85669291 0.85984252
|
|
0.85691824 0.84748428 0.85062893 0.85220126]
|
|
|
|
mean value: 0.8531642153221414
|
|
|
|
key: test_fscore
|
|
value: [0.80555556 0.81081081 0.85714286 0.86111111 0.83783784 0.85714286
|
|
0.85714286 0.85333333 0.84057971 0.88235294]
|
|
|
|
mean value: 0.8463009871398618
|
|
|
|
key: train_fscore
|
|
value: [0.85801527 0.85932722 0.85060976 0.85972851 0.86356822 0.86736215
|
|
0.86191199 0.85280728 0.85584219 0.85970149]
|
|
|
|
mean value: 0.858887405841854
|
|
|
|
key: test_precision
|
|
value: [0.80555556 0.78947368 0.80487805 0.83783784 0.79487179 0.78571429
|
|
0.78571429 0.8 0.85294118 0.90909091]
|
|
|
|
mean value: 0.8166077578246271
|
|
|
|
key: train_precision
|
|
value: [0.83136095 0.83382789 0.82300885 0.82608696 0.8252149 0.82436261
|
|
0.83284457 0.82404692 0.82697947 0.81818182]
|
|
|
|
mean value: 0.8265914937869413
|
|
|
|
key: test_recall
|
|
value: [0.80555556 0.83333333 0.91666667 0.88571429 0.88571429 0.94285714
|
|
0.94285714 0.91428571 0.82857143 0.85714286]
|
|
|
|
mean value: 0.8812698412698412
|
|
|
|
key: train_recall
|
|
value: [0.88643533 0.88643533 0.88012618 0.89622642 0.90566038 0.91509434
|
|
0.89308176 0.8836478 0.88679245 0.90566038]
|
|
|
|
mean value: 0.8939160367438447
|
|
|
|
key: test_roc_auc
|
|
value: [0.80277778 0.80238095 0.84404762 0.85952381 0.83174603 0.84642857
|
|
0.84285714 0.84285714 0.84285714 0.88571429]
|
|
|
|
mean value: 0.8401190476190477
|
|
|
|
key: train_roc_auc
|
|
value: [0.85359502 0.85516735 0.84572347 0.85347598 0.85661568 0.85975537
|
|
0.85691824 0.84748428 0.85062893 0.85220126]
|
|
|
|
mean value: 0.8531565581413805
|
|
|
|
key: test_jcc
|
|
value: [0.6744186 0.68181818 0.75 0.75609756 0.72093023 0.75
|
|
0.75 0.74418605 0.725 0.78947368]
|
|
|
|
mean value: 0.7341924310725247
|
|
|
|
key: train_jcc
|
|
value: [0.7513369 0.75335121 0.74005305 0.75396825 0.75989446 0.76578947
|
|
0.75733333 0.74338624 0.74801061 0.7539267 ]
|
|
|
|
mean value: 0.7527050230353116
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02575755 0.02991819 0.03130794 0.03118706 0.03068423 0.02614307
|
|
0.03053689 0.02595377 0.0286572 0.02632809]
|
|
|
|
mean value: 0.028647398948669432
|
|
|
|
key: score_time
|
|
value: [0.01124167 0.01198244 0.01210093 0.01215649 0.01215649 0.01206732
|
|
0.0118773 0.01179934 0.01207471 0.01209283]
|
|
|
|
mean value: 0.011954951286315917
|
|
|
|
key: test_mcc
|
|
value: [0.81932673 0.91580648 0.97220047 0.89315217 0.9451949 0.86753285
|
|
0.860309 0.97182532 0.89155583 0.88571429]
|
|
|
|
mean value: 0.9022618028457218
|
|
|
|
key: train_mcc
|
|
value: [0.88425422 0.95333063 0.96539786 0.95962895 0.97177328 0.94072529
|
|
0.96863009 0.95287259 0.91501318 0.9625688 ]
|
|
|
|
mean value: 0.9474194901340667
|
|
|
|
key: test_accuracy
|
|
value: [0.90140845 0.95774648 0.98591549 0.94366197 0.97183099 0.92957746
|
|
0.92857143 0.98571429 0.94285714 0.94285714]
|
|
|
|
mean value: 0.9490140845070423
|
|
|
|
key: train_accuracy
|
|
value: [0.94015748 0.97637795 0.98267717 0.97952756 0.98582677 0.97007874
|
|
0.98427673 0.97641509 0.95597484 0.98113208]
|
|
|
|
mean value: 0.9732444411429704
|
|
|
|
key: test_fscore
|
|
value: [0.89230769 0.95890411 0.98630137 0.94594595 0.97222222 0.92307692
|
|
0.93150685 0.98591549 0.94594595 0.94285714]
|
|
|
|
mean value: 0.9484983694080742
|
|
|
|
key: train_fscore
|
|
value: [0.93708609 0.97674419 0.98273155 0.97990726 0.98595944 0.9696
|
|
0.984375 0.97630332 0.95770393 0.98136646]
|
|
|
|
mean value: 0.973177724025148
|
|
|
|
key: test_precision
|
|
value: [1. 0.94594595 0.97297297 0.8974359 0.94594595 1.
|
|
0.89473684 0.97222222 0.8974359 0.94285714]
|
|
|
|
mean value: 0.9469552866921288
|
|
|
|
key: train_precision
|
|
value: [0.98606272 0.96036585 0.978125 0.96352584 0.97832817 0.98697068
|
|
0.97826087 0.98095238 0.92151163 0.96932515]
|
|
|
|
mean value: 0.9703428296507342
|
|
|
|
key: test_recall
|
|
value: [0.80555556 0.97222222 1. 1. 1. 0.85714286
|
|
0.97142857 1. 1. 0.94285714]
|
|
|
|
mean value: 0.954920634920635
|
|
|
|
key: train_recall
|
|
value: [0.89274448 0.99369085 0.9873817 0.99685535 0.99371069 0.95283019
|
|
0.99056604 0.97169811 0.99685535 0.99371069]
|
|
|
|
mean value: 0.9770043449794655
|
|
|
|
key: test_roc_auc
|
|
value: [0.90277778 0.95753968 0.98571429 0.94444444 0.97222222 0.92857143
|
|
0.92857143 0.98571429 0.94285714 0.94285714]
|
|
|
|
mean value: 0.9491269841269842
|
|
|
|
key: train_roc_auc
|
|
value: [0.94008293 0.97640517 0.98268456 0.97950023 0.98581434 0.97010595
|
|
0.98427673 0.97641509 0.95597484 0.98113208]
|
|
|
|
mean value: 0.9732391921115806
|
|
|
|
key: test_jcc
|
|
value: [0.80555556 0.92105263 0.97297297 0.8974359 0.94594595 0.85714286
|
|
0.87179487 0.97222222 0.8974359 0.89189189]
|
|
|
|
mean value: 0.903345074397706
|
|
|
|
key: train_jcc
|
|
value: [0.88161994 0.95454545 0.96604938 0.96060606 0.97230769 0.94099379
|
|
0.96923077 0.9537037 0.91884058 0.96341463]
|
|
|
|
mean value: 0.9481312003480796
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02481818 0.02070785 0.02387905 0.0211482 0.02515388 0.02220917
|
|
0.01979041 0.02351236 0.02417684 0.0264678 ]
|
|
|
|
mean value: 0.023186373710632324
|
|
|
|
key: score_time
|
|
value: [0.01183772 0.01201558 0.01180005 0.0193615 0.01207042 0.01205468
|
|
0.01208997 0.01203227 0.01205897 0.01206756]
|
|
|
|
mean value: 0.012738871574401855
|
|
|
|
key: test_mcc
|
|
value: [0.89282857 0.8365327 1. 0.91587302 0.9451949 0.88730159
|
|
0.860309 0.63245553 0.81649658 0.69954392]
|
|
|
|
mean value: 0.8486535812064163
|
|
|
|
key: train_mcc
|
|
value: [0.92369491 0.8478972 0.96574383 0.96535886 0.95909844 0.96228025
|
|
0.92339931 0.66872051 0.80366328 0.80633233]
|
|
|
|
mean value: 0.8826188931457227
|
|
|
|
key: test_accuracy
|
|
value: [0.94366197 0.91549296 1. 0.95774648 0.97183099 0.94366197
|
|
0.92857143 0.78571429 0.9 0.82857143]
|
|
|
|
mean value: 0.9175251509054326
|
|
|
|
key: train_accuracy
|
|
value: [0.96062992 0.91968504 0.98267717 0.98267717 0.97952756 0.98110236
|
|
0.96069182 0.81132075 0.89308176 0.89465409]
|
|
|
|
mean value: 0.9366047640271381
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 0.91176471 1. 0.95774648 0.97222222 0.94285714
|
|
0.93150685 0.72727273 0.90909091 0.79310345]
|
|
|
|
mean value: 0.9092932904842156
|
|
|
|
key: train_fscore
|
|
value: [0.96194825 0.91341256 0.98289269 0.98273155 0.97946288 0.98125
|
|
0.96194825 0.76923077 0.9031339 0.882662 ]
|
|
|
|
mean value: 0.931867285163906
|
|
|
|
key: test_precision
|
|
value: [0.9 0.96875 1. 0.94444444 0.94594595 0.94285714
|
|
0.89473684 1. 0.83333333 1. ]
|
|
|
|
mean value: 0.943006770868613
|
|
|
|
key: train_precision
|
|
value: [0.92941176 0.98897059 0.96932515 0.98119122 0.98412698 0.97515528
|
|
0.93215339 0.99009901 0.82552083 0.99604743]
|
|
|
|
mean value: 0.9572001658910778
|
|
|
|
key: test_recall
|
|
value: [1. 0.86111111 1. 0.97142857 1. 0.94285714
|
|
0.97142857 0.57142857 1. 0.65714286]
|
|
|
|
mean value: 0.8975396825396825
|
|
|
|
key: train_recall
|
|
value: [0.99684543 0.84858044 0.99684543 0.98427673 0.97484277 0.98742138
|
|
0.99371069 0.62893082 0.99685535 0.79245283]
|
|
|
|
mean value: 0.920076185941313
|
|
|
|
key: test_roc_auc
|
|
value: [0.94285714 0.91626984 1. 0.95793651 0.97222222 0.94365079
|
|
0.92857143 0.78571429 0.9 0.82857143]
|
|
|
|
mean value: 0.9175793650793651
|
|
|
|
key: train_roc_auc
|
|
value: [0.96068686 0.91957324 0.98269944 0.98267464 0.97953495 0.9810924
|
|
0.96069182 0.81132075 0.89308176 0.89465409]
|
|
|
|
mean value: 0.9366009959724619
|
|
|
|
key: test_jcc
|
|
value: [0.9 0.83783784 1. 0.91891892 0.94594595 0.89189189
|
|
0.87179487 0.57142857 0.83333333 0.65714286]
|
|
|
|
mean value: 0.8428294228294229
|
|
|
|
key: train_jcc
|
|
value: [0.92668622 0.840625 0.96636086 0.96604938 0.95975232 0.96319018
|
|
0.92668622 0.625 0.82337662 0.78996865]
|
|
|
|
mean value: 0.8787695454447503
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.31250501 0.27678704 0.27812409 0.27637839 0.27873325 0.27795339
|
|
0.27697086 0.27602839 0.27625799 0.27700305]
|
|
|
|
mean value: 0.280674147605896
|
|
|
|
key: score_time
|
|
value: [0.01580977 0.01576138 0.01578331 0.01573086 0.01573992 0.01587272
|
|
0.0156374 0.01558733 0.01552153 0.01558471]
|
|
|
|
mean value: 0.01570289134979248
|
|
|
|
key: test_mcc
|
|
value: [0.94511009 1. 0.97222222 0.9451949 0.9451949 0.94511009
|
|
0.94440028 1. 0.94440028 0.94440028]
|
|
|
|
mean value: 0.9586033041629676
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 1. 0.98591549 0.97183099 0.97183099 0.97183099
|
|
0.97142857 1. 0.97142857 0.97142857]
|
|
|
|
mean value: 0.9787525150905433
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97297297 1. 0.98591549 0.97222222 0.97222222 0.97058824
|
|
0.97222222 1. 0.97222222 0.97058824]
|
|
|
|
mean value: 0.9788953825407843
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94736842 1. 1. 0.94594595 0.94594595 1.
|
|
0.94594595 1. 0.94594595 1. ]
|
|
|
|
mean value: 0.9731152204836415
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 1. 1. 0.94285714
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9857936507936508
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97142857 1. 0.98611111 0.97222222 0.97222222 0.97142857
|
|
0.97142857 1. 0.97142857 0.97142857]
|
|
|
|
mean value: 0.9787698412698412
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94736842 1. 0.97222222 0.94594595 0.94594595 0.94285714
|
|
0.94594595 1. 0.94594595 0.94285714]
|
|
|
|
mean value: 0.9589088712772923
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10428452 0.10878515 0.11316752 0.08162498 0.11346102 0.10043049
|
|
0.12153935 0.10907197 0.10956621 0.11096096]
|
|
|
|
mean value: 0.10728921890258789
|
|
|
|
key: score_time
|
|
value: [0.03129196 0.04516673 0.01808453 0.03379202 0.03972292 0.03117657
|
|
0.03427434 0.03855872 0.04342604 0.03423619]
|
|
|
|
mean value: 0.03497300148010254
|
|
|
|
key: test_mcc
|
|
value: [0.94511009 0.97220047 0.9451949 0.88730159 0.91587302 0.97220047
|
|
0.91766294 1. 0.91465912 0.91465912]
|
|
|
|
mean value: 0.9384861698533029
|
|
|
|
key: train_mcc
|
|
value: [0.99685535 0.99055612 0.99370077 0.97795766 0.99370077 0.98740154
|
|
0.99686027 0.99371069 0.99686027 0.98744091]
|
|
|
|
mean value: 0.99150443665945
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 0.98591549 0.97183099 0.94366197 0.95774648 0.98591549
|
|
0.95714286 1. 0.95714286 0.95714286]
|
|
|
|
mean value: 0.9688329979879275
|
|
|
|
key: train_accuracy
|
|
value: [0.9984252 0.99527559 0.99685039 0.98897638 0.99685039 0.99370079
|
|
0.99842767 0.99685535 0.99842767 0.99371069]
|
|
|
|
mean value: 0.9957500123805278
|
|
|
|
key: test_fscore
|
|
value: [0.97297297 0.98630137 0.97142857 0.94285714 0.95774648 0.98550725
|
|
0.95890411 1. 0.95774648 0.95652174]
|
|
|
|
mean value: 0.9689986109964468
|
|
|
|
key: train_fscore
|
|
value: [0.9984252 0.99527559 0.99684543 0.98897638 0.99685535 0.99371069
|
|
0.9984252 0.99685535 0.99843014 0.99373041]
|
|
|
|
mean value: 0.9957529720530826
|
|
|
|
key: test_precision
|
|
value: [0.94736842 0.97297297 1. 0.94285714 0.94444444 1.
|
|
0.92105263 1. 0.94444444 0.97058824]
|
|
|
|
mean value: 0.9643728292644701
|
|
|
|
key: train_precision
|
|
value: [0.99685535 0.99371069 0.99684543 0.99053628 0.99685535 0.99371069
|
|
1. 0.99685535 0.9968652 0.990625 ]
|
|
|
|
mean value: 0.9952859328615434
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.94444444 0.94285714 0.97142857 0.97142857
|
|
1. 1. 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9744444444444444
|
|
|
|
key: train_recall
|
|
value: [1. 0.99684543 0.99684543 0.98742138 0.99685535 0.99371069
|
|
0.99685535 0.99685535 1. 0.99685535]
|
|
|
|
mean value: 0.9962244310854512
|
|
|
|
key: test_roc_auc
|
|
value: [0.97142857 0.98571429 0.97222222 0.94365079 0.95793651 0.98571429
|
|
0.95714286 1. 0.95714286 0.95714286]
|
|
|
|
mean value: 0.9688095238095238
|
|
|
|
key: train_roc_auc
|
|
value: [0.99842767 0.99527806 0.99685039 0.98897883 0.99685039 0.99370077
|
|
0.99842767 0.99685535 0.99842767 0.99371069]
|
|
|
|
mean value: 0.9957507489633554
|
|
|
|
key: test_jcc
|
|
value: [0.94736842 0.97297297 0.94444444 0.89189189 0.91891892 0.97142857
|
|
0.92105263 1. 0.91891892 0.91666667]
|
|
|
|
mean value: 0.9403663437873964
|
|
|
|
key: train_jcc
|
|
value: [0.99685535 0.99059561 0.99371069 0.97819315 0.99373041 0.9875
|
|
0.99685535 0.99373041 0.9968652 0.98753894]
|
|
|
|
mean value: 0.9915575100969257
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.28510761 0.28897977 0.33660936 0.29414654 0.28814769 0.2920413
|
|
0.27245522 0.30202746 0.31658602 0.28743124]
|
|
|
|
mean value: 0.29635322093963623
|
|
|
|
key: score_time
|
|
value: [0.02856183 0.02878904 0.02860594 0.02879 0.02881074 0.03043103
|
|
0.02863693 0.02879333 0.02869368 0.02869058]
|
|
|
|
mean value: 0.02888031005859375
|
|
|
|
key: test_mcc
|
|
value: [0.69023056 0.74662454 0.77601295 0.8031746 0.8365327 0.88730159
|
|
0.77651637 0.69282032 0.77651637 0.74560114]
|
|
|
|
mean value: 0.7731331136250298
|
|
|
|
key: train_mcc
|
|
value: [0.97482268 0.97480309 0.96850386 0.97165815 0.96852339 0.97165815
|
|
0.97803094 0.97170292 0.98115148 0.97170292]
|
|
|
|
mean value: 0.9732557574743983
|
|
|
|
key: test_accuracy
|
|
value: [0.84507042 0.87323944 0.88732394 0.90140845 0.91549296 0.94366197
|
|
0.88571429 0.84285714 0.88571429 0.87142857]
|
|
|
|
mean value: 0.8851911468812877
|
|
|
|
key: train_accuracy
|
|
value: [0.98740157 0.98740157 0.98425197 0.98582677 0.98425197 0.98582677
|
|
0.98899371 0.98584906 0.99056604 0.98584906]
|
|
|
|
mean value: 0.986621849155648
|
|
|
|
key: test_fscore
|
|
value: [0.84931507 0.87671233 0.88571429 0.90140845 0.91891892 0.94285714
|
|
0.89189189 0.85333333 0.89189189 0.86567164]
|
|
|
|
mean value: 0.8877714954363009
|
|
|
|
key: train_fscore
|
|
value: [0.98742138 0.9873817 0.98422713 0.98587127 0.98422713 0.98587127
|
|
0.98894155 0.98582677 0.99059561 0.98587127]
|
|
|
|
mean value: 0.9866235091671645
|
|
|
|
key: test_precision
|
|
value: [0.83783784 0.86486486 0.91176471 0.88888889 0.87179487 0.94285714
|
|
0.84615385 0.8 0.84615385 0.90625 ]
|
|
|
|
mean value: 0.8716566004433651
|
|
|
|
key: train_precision
|
|
value: [0.98432602 0.9873817 0.98422713 0.98432602 0.98734177 0.98432602
|
|
0.99365079 0.9873817 0.9875 0.98432602]
|
|
|
|
mean value: 0.9864787177315405
|
|
|
|
key: test_recall
|
|
value: [0.86111111 0.88888889 0.86111111 0.91428571 0.97142857 0.94285714
|
|
0.94285714 0.91428571 0.94285714 0.82857143]
|
|
|
|
mean value: 0.9068253968253969
|
|
|
|
key: train_recall
|
|
value: [0.99053628 0.9873817 0.98422713 0.98742138 0.98113208 0.98742138
|
|
0.98427673 0.98427673 0.99371069 0.98742138]
|
|
|
|
mean value: 0.9867805487768585
|
|
|
|
key: test_roc_auc
|
|
value: [0.84484127 0.87301587 0.88769841 0.9015873 0.91626984 0.94365079
|
|
0.88571429 0.84285714 0.88571429 0.87142857]
|
|
|
|
mean value: 0.8852777777777777
|
|
|
|
key: train_roc_auc
|
|
value: [0.9874065 0.98740154 0.98425193 0.98582426 0.98425689 0.98582426
|
|
0.98899371 0.98584906 0.99056604 0.98584906]
|
|
|
|
mean value: 0.9866223240680119
|
|
|
|
key: test_jcc
|
|
value: [0.73809524 0.7804878 0.79487179 0.82051282 0.85 0.89189189
|
|
0.80487805 0.74418605 0.80487805 0.76315789]
|
|
|
|
mean value: 0.799295958905924
|
|
|
|
key: train_jcc
|
|
value: [0.97515528 0.97507788 0.9689441 0.97213622 0.9689441 0.97213622
|
|
0.978125 0.97204969 0.98136646 0.97213622]
|
|
|
|
mean value: 0.973607117767978
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.19937086 1.20689344 1.17018795 1.16049933 1.1688621 1.17739654
|
|
1.16944814 1.16343355 1.16277099 1.17272592]
|
|
|
|
mean value: 1.1751588821411132
|
|
|
|
key: score_time
|
|
value: [0.01009917 0.00925827 0.00936151 0.00923276 0.0095315 0.00944948
|
|
0.00931478 0.00933528 0.00949788 0.009238 ]
|
|
|
|
mean value: 0.009431862831115722
|
|
|
|
key: test_mcc
|
|
value: [0.91580648 0.94511009 0.91587302 0.91587302 0.91885703 0.94511009
|
|
0.91766294 0.94440028 0.94440028 0.8871639 ]
|
|
|
|
mean value: 0.9250257128320715
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95774648 0.97183099 0.95774648 0.95774648 0.95774648 0.97183099
|
|
0.95714286 0.97142857 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9617505030181086
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95890411 0.97297297 0.95774648 0.95774648 0.95890411 0.97058824
|
|
0.95890411 0.97058824 0.97222222 0.94117647]
|
|
|
|
mean value: 0.9619753422885268
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94594595 0.94736842 0.97142857 0.94444444 0.92105263 1.
|
|
0.92105263 1. 0.94594595 0.96969697]
|
|
|
|
mean value: 0.9566935561672404
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97222222 1. 0.94444444 0.97142857 1. 0.94285714
|
|
1. 0.94285714 1. 0.91428571]
|
|
|
|
mean value: 0.9688095238095238
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95753968 0.97142857 0.95793651 0.95793651 0.95833333 0.97142857
|
|
0.95714286 0.97142857 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9617460317460317
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.92105263 0.94736842 0.91891892 0.91891892 0.92105263 0.94285714
|
|
0.92105263 0.94285714 0.94594595 0.88888889]
|
|
|
|
mean value: 0.9268913274176432
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03551579 0.03432131 0.03407979 0.03313398 0.03578806 0.0337913
|
|
0.03626204 0.03587937 0.03342652 0.03366303]
|
|
|
|
mean value: 0.034586119651794436
|
|
|
|
key: score_time
|
|
value: [0.01272178 0.01276183 0.01627135 0.01608157 0.01268482 0.01486874
|
|
0.01266551 0.01292658 0.01618767 0.01473212]
|
|
|
|
mean value: 0.014190196990966797
|
|
|
|
key: test_mcc
|
|
value: [0.8594125 0.91580648 0.91587302 0.94365079 0.94511009 0.88730159
|
|
0.91465912 0.91465912 0.85749293 0.82992752]
|
|
|
|
mean value: 0.8983893161812995
|
|
|
|
key: train_mcc
|
|
value: [0.99059552 0.98748016 0.9812781 0.98429564 0.96898308 0.981277
|
|
0.9751514 0.97208751 0.99061012 0.98439842]
|
|
|
|
mean value: 0.9816156943004664
|
|
|
|
key: test_accuracy
|
|
value: [0.92957746 0.95774648 0.95774648 0.97183099 0.97183099 0.94366197
|
|
0.95714286 0.95714286 0.92857143 0.91428571]
|
|
|
|
mean value: 0.948953722334004
|
|
|
|
key: train_accuracy
|
|
value: [0.99527559 0.99370079 0.99055118 0.99212598 0.98425197 0.99055118
|
|
0.98742138 0.98584906 0.99528302 0.99213836]
|
|
|
|
mean value: 0.9907148516812757
|
|
|
|
key: test_fscore
|
|
value: [0.93150685 0.95890411 0.95774648 0.97142857 0.97058824 0.94285714
|
|
0.95774648 0.95774648 0.92957746 0.91176471]
|
|
|
|
mean value: 0.9489866515774745
|
|
|
|
key: train_fscore
|
|
value: [0.99529042 0.99373041 0.990625 0.99217527 0.98452012 0.99065421
|
|
0.98757764 0.98604651 0.99530516 0.99219969]
|
|
|
|
mean value: 0.9908124438383492
|
|
|
|
key: test_precision
|
|
value: [0.91891892 0.94594595 0.97142857 0.97142857 1. 0.94285714
|
|
0.94444444 0.94444444 0.91666667 0.93939394]
|
|
|
|
mean value: 0.9495528645528646
|
|
|
|
key: train_precision
|
|
value: [0.990625 0.98753894 0.98142415 0.98753894 0.9695122 0.98148148
|
|
0.97546012 0.97247706 0.99065421 0.98452012]
|
|
|
|
mean value: 0.9821232223196238
|
|
|
|
key: test_recall
|
|
value: [0.94444444 0.97222222 0.94444444 0.97142857 0.94285714 0.94285714
|
|
0.97142857 0.97142857 0.94285714 0.88571429]
|
|
|
|
mean value: 0.9489682539682539
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.99685535 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999685534591195
|
|
|
|
key: test_roc_auc
|
|
value: [0.92936508 0.95753968 0.95793651 0.9718254 0.97142857 0.94365079
|
|
0.95714286 0.95714286 0.92857143 0.91428571]
|
|
|
|
mean value: 0.9488888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.99528302 0.99371069 0.99056604 0.99211852 0.98422713 0.99053628
|
|
0.98742138 0.98584906 0.99528302 0.99213836]
|
|
|
|
mean value: 0.9907133503958098
|
|
|
|
key: test_jcc
|
|
value: [0.87179487 0.92105263 0.91891892 0.94444444 0.94285714 0.89189189
|
|
0.91891892 0.91891892 0.86842105 0.83783784]
|
|
|
|
mean value: 0.9035056629793472
|
|
|
|
key: train_jcc
|
|
value: [0.990625 0.98753894 0.98142415 0.98447205 0.9695122 0.98148148
|
|
0.97546012 0.97247706 0.99065421 0.98452012]
|
|
|
|
mean value: 0.981816533207571
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03021312 0.03962302 0.04501748 0.03966117 0.04009008 0.04018998
|
|
0.03997707 0.04005098 0.04009175 0.04006386]
|
|
|
|
mean value: 0.03949785232543945
|
|
|
|
key: score_time
|
|
value: [0.01974463 0.01872849 0.01866293 0.01871657 0.01866937 0.01861072
|
|
0.01875806 0.01867962 0.01865005 0.01877975]
|
|
|
|
mean value: 0.018800020217895508
|
|
|
|
key: test_mcc
|
|
value: [0.94365079 0.94511009 1. 0.91587302 0.9451949 0.91580648
|
|
0.860309 0.94440028 0.94440028 0.82857143]
|
|
|
|
mean value: 0.9243316274891484
|
|
|
|
key: train_mcc
|
|
value: [0.95598214 0.95279902 0.95917497 0.95909722 0.95905957 0.95917295
|
|
0.96228318 0.95599375 0.95287259 0.96234029]
|
|
|
|
mean value: 0.95787756988014
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 0.97183099 1. 0.95774648 0.97183099 0.95774648
|
|
0.92857143 0.97142857 0.97142857 0.91428571]
|
|
|
|
mean value: 0.9616700201207243
|
|
|
|
key: train_accuracy
|
|
value: [0.97795276 0.97637795 0.97952756 0.97952756 0.97952756 0.97952756
|
|
0.98113208 0.97798742 0.97641509 0.98113208]
|
|
|
|
mean value: 0.9789107611548556
|
|
|
|
key: test_fscore
|
|
value: [0.97222222 0.97297297 1. 0.95774648 0.97222222 0.95652174
|
|
0.93150685 0.97222222 0.97222222 0.91428571]
|
|
|
|
mean value: 0.9621922643466319
|
|
|
|
key: train_fscore
|
|
value: [0.97805643 0.97645212 0.97965571 0.97965571 0.97959184 0.97971919
|
|
0.98119122 0.97805643 0.97652582 0.98125 ]
|
|
|
|
mean value: 0.9790154465743017
|
|
|
|
key: test_precision
|
|
value: [0.97222222 0.94736842 1. 0.94444444 0.94594595 0.97058824
|
|
0.89473684 0.94594595 0.94594595 0.91428571]
|
|
|
|
mean value: 0.9481483717242231
|
|
|
|
key: train_precision
|
|
value: [0.97196262 0.971875 0.97204969 0.97507788 0.97805643 0.97213622
|
|
0.978125 0.975 0.97196262 0.97515528]
|
|
|
|
mean value: 0.9741400733451402
|
|
|
|
key: test_recall
|
|
value: [0.97222222 1. 1. 0.97142857 1. 0.94285714
|
|
0.97142857 1. 1. 0.91428571]
|
|
|
|
mean value: 0.9772222222222222
|
|
|
|
key: train_recall
|
|
value: [0.98422713 0.98107256 0.9873817 0.98427673 0.98113208 0.98742138
|
|
0.98427673 0.98113208 0.98113208 0.98742138]
|
|
|
|
mean value: 0.9839473840842807
|
|
|
|
key: test_roc_auc
|
|
value: [0.9718254 0.97142857 1. 0.95793651 0.97222222 0.95753968
|
|
0.92857143 0.97142857 0.97142857 0.91428571]
|
|
|
|
mean value: 0.9616666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.97796262 0.97638533 0.97953991 0.97952007 0.97952503 0.97951511
|
|
0.98113208 0.97798742 0.97641509 0.98113208]
|
|
|
|
mean value: 0.9789114735234015
|
|
|
|
key: test_jcc
|
|
value: [0.94594595 0.94736842 1. 0.91891892 0.94594595 0.91666667
|
|
0.87179487 0.94594595 0.94594595 0.84210526]
|
|
|
|
mean value: 0.9280637925374767
|
|
|
|
key: train_jcc
|
|
value: [0.95705521 0.95398773 0.9601227 0.9601227 0.96 0.96024465
|
|
0.96307692 0.95705521 0.95412844 0.96319018]
|
|
|
|
mean value: 0.9588983754093227
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25199175 0.30752754 0.30353451 0.31783128 0.38609529 0.30748248
|
|
0.30769587 0.31216884 0.31785393 0.30652618]
|
|
|
|
mean value: 0.31187076568603517
|
|
|
|
key: score_time
|
|
value: [0.01877356 0.01874542 0.01878929 0.01904488 0.0187614 0.01877642
|
|
0.01883507 0.0187788 0.01881099 0.01871586]
|
|
|
|
mean value: 0.018803167343139648
|
|
|
|
key: test_mcc
|
|
value: [0.94365079 0.94511009 1. 0.91587302 0.9451949 0.8594125
|
|
0.860309 0.94440028 0.94440028 0.82857143]
|
|
|
|
mean value: 0.9186922293639426
|
|
|
|
key: train_mcc
|
|
value: [0.95598214 0.95279902 0.95917497 0.97177328 0.95905957 0.95607483
|
|
0.96228318 0.95599375 0.95287259 0.96234029]
|
|
|
|
mean value: 0.9588353639872902
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 0.97183099 1. 0.95774648 0.97183099 0.92957746
|
|
0.92857143 0.97142857 0.97142857 0.91428571]
|
|
|
|
mean value: 0.9588531187122736
|
|
|
|
key: train_accuracy
|
|
value: [0.97795276 0.97637795 0.97952756 0.98582677 0.97952756 0.97795276
|
|
0.98113208 0.97798742 0.97641509 0.98113208]
|
|
|
|
mean value: 0.9793832020997375
|
|
|
|
key: test_fscore
|
|
value: [0.97222222 0.97297297 1. 0.95774648 0.97222222 0.92753623
|
|
0.93150685 0.97222222 0.97222222 0.91428571]
|
|
|
|
mean value: 0.9592937136219942
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:135: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:138: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.97805643 0.97645212 0.97965571 0.98595944 0.97959184 0.97819315
|
|
0.98119122 0.97805643 0.97652582 0.98125 ]
|
|
|
|
mean value: 0.9794932149720369
|
|
|
|
key: test_precision
|
|
value: [0.97222222 0.94736842 1. 0.94444444 0.94594595 0.94117647
|
|
0.89473684 0.94594595 0.94594595 0.91428571]
|
|
|
|
mean value: 0.9452071952536348
|
|
|
|
key: train_precision
|
|
value: [0.97196262 0.971875 0.97204969 0.97832817 0.97805643 0.9691358
|
|
0.978125 0.975 0.97196262 0.97515528]
|
|
|
|
mean value: 0.9741650604764996
|
|
|
|
key: test_recall
|
|
value: [0.97222222 1. 1. 0.97142857 1. 0.91428571
|
|
0.97142857 1. 1. 0.91428571]
|
|
|
|
mean value: 0.9743650793650793
|
|
|
|
key: train_recall
|
|
value: [0.98422713 0.98107256 0.9873817 0.99371069 0.98113208 0.98742138
|
|
0.98427673 0.98113208 0.98113208 0.98742138]
|
|
|
|
mean value: 0.9848907803106958
|
|
|
|
key: test_roc_auc
|
|
value: [0.9718254 0.97142857 1. 0.95793651 0.97222222 0.92936508
|
|
0.92857143 0.97142857 0.97142857 0.91428571]
|
|
|
|
mean value: 0.9588492063492063
|
|
|
|
key: train_roc_auc
|
|
value: [0.97796262 0.97638533 0.97953991 0.98581434 0.97952503 0.97793782
|
|
0.98113208 0.97798742 0.97641509 0.98113208]
|
|
|
|
mean value: 0.9793831716366089
|
|
|
|
key: test_jcc
|
|
value: [0.94594595 0.94736842 1. 0.91891892 0.94594595 0.86486486
|
|
0.87179487 0.94594595 0.94594595 0.84210526]
|
|
|
|
mean value: 0.9228836123572965
|
|
|
|
key: train_jcc
|
|
value: [0.95705521 0.95398773 0.9601227 0.97230769 0.96 0.95731707
|
|
0.96307692 0.95705521 0.95412844 0.96319018]
|
|
|
|
mean value: 0.9598241171867105
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03594255 0.03948259 0.03835654 0.0386765 0.03851318 0.03880954
|
|
0.03893447 0.0384903 0.03879142 0.04603362]
|
|
|
|
mean value: 0.03920307159423828
|
|
|
|
key: score_time
|
|
value: [0.01199222 0.01490903 0.01501274 0.01509595 0.01472116 0.01482964
|
|
0.0148356 0.0149467 0.01498246 0.01514578]
|
|
|
|
mean value: 0.014647126197814941
|
|
|
|
key: test_mcc
|
|
value: [0.8365327 0.94511009 0.94511009 0.77601295 0.88880092 0.88862624
|
|
0.8340361 0.91465912 0.82857143 0.80295507]
|
|
|
|
mean value: 0.8660414704445305
|
|
|
|
key: train_mcc
|
|
value: [0.915 0.91818128 0.91209691 0.92440926 0.92448413 0.88987659
|
|
0.91211206 0.92460145 0.9028774 0.90902529]
|
|
|
|
mean value: 0.9132664368918785
|
|
|
|
key: test_accuracy
|
|
value: [0.91549296 0.97183099 0.97183099 0.88732394 0.94366197 0.94366197
|
|
0.91428571 0.95714286 0.91428571 0.9 ]
|
|
|
|
mean value: 0.9319517102615694
|
|
|
|
key: train_accuracy
|
|
value: [0.95748031 0.95905512 0.95590551 0.96220472 0.96220472 0.94488189
|
|
0.95597484 0.96226415 0.95125786 0.95440252]
|
|
|
|
mean value: 0.9565631654533749
|
|
|
|
key: test_fscore
|
|
value: [0.91176471 0.97297297 0.97297297 0.88888889 0.94444444 0.94117647
|
|
0.91891892 0.95774648 0.91428571 0.89552239]
|
|
|
|
mean value: 0.9318693955887442
|
|
|
|
key: train_fscore
|
|
value: [0.95721078 0.95873016 0.95527157 0.96226415 0.96202532 0.94453249
|
|
0.95555556 0.96202532 0.95055821 0.95389507]
|
|
|
|
mean value: 0.9562068613553221
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.94736842 0.94736842 0.86486486 0.91891892 0.96969697
|
|
0.87179487 0.94444444 0.91428571 0.9375 ]
|
|
|
|
mean value: 0.9284992626111047
|
|
|
|
key: train_precision
|
|
value: [0.96178344 0.96485623 0.96763754 0.96226415 0.96815287 0.95207668
|
|
0.96474359 0.96815287 0.96440129 0.96463023]
|
|
|
|
mean value: 0.9638698880041593
|
|
|
|
key: test_recall
|
|
value: [0.86111111 1. 1. 0.91428571 0.97142857 0.91428571
|
|
0.97142857 0.97142857 0.91428571 0.85714286]
|
|
|
|
mean value: 0.9375396825396826
|
|
|
|
key: train_recall
|
|
value: [0.95268139 0.95268139 0.94321767 0.96226415 0.95597484 0.93710692
|
|
0.94654088 0.95597484 0.93710692 0.94339623]
|
|
|
|
mean value: 0.9486945221514592
|
|
|
|
key: test_roc_auc
|
|
value: [0.91626984 0.97142857 0.97142857 0.88769841 0.94404762 0.94325397
|
|
0.91428571 0.95714286 0.91428571 0.9 ]
|
|
|
|
mean value: 0.9319841269841269
|
|
|
|
key: train_roc_auc
|
|
value: [0.95747277 0.9590451 0.95588556 0.96220463 0.96221455 0.94489415
|
|
0.95597484 0.96226415 0.95125786 0.95440252]
|
|
|
|
mean value: 0.956561613396028
|
|
|
|
key: test_jcc
|
|
value: [0.83783784 0.94736842 0.94736842 0.8 0.89473684 0.88888889
|
|
0.85 0.91891892 0.84210526 0.81081081]
|
|
|
|
mean value: 0.8738035403824878
|
|
|
|
key: train_jcc
|
|
value: [0.91793313 0.92073171 0.91437309 0.92727273 0.92682927 0.89489489
|
|
0.91489362 0.92682927 0.90577508 0.9118541 ]
|
|
|
|
mean value: 0.9161386881806748
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.93982506 1.07793832 0.8825326 1.00127554 0.89867663 0.97309875
|
|
0.89683342 0.93667412 1.0593009 0.9926424 ]
|
|
|
|
mean value: 0.9658797740936279
|
|
|
|
key: score_time
|
|
value: [0.0186491 0.01506066 0.01521134 0.01231885 0.01510096 0.01711798
|
|
0.015131 0.02322721 0.01519895 0.01535654]
|
|
|
|
mean value: 0.016237258911132812
|
|
|
|
key: test_mcc
|
|
value: [0.94365079 0.9186708 0.94511009 0.86237318 0.9451949 0.97220047
|
|
0.94440028 0.94440028 0.94440028 0.91465912]
|
|
|
|
mean value: 0.9335060194868096
|
|
|
|
key: train_mcc
|
|
value: [0.98742126 0.9842961 0.98118056 0.99059524 0.98117981 0.97799603
|
|
0.98432053 0.9812097 0.99061012 0.97810833]
|
|
|
|
mean value: 0.9836917695389659
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 0.95774648 0.97183099 0.92957746 0.97183099 0.98591549
|
|
0.97142857 0.97142857 0.97142857 0.95714286]
|
|
|
|
mean value: 0.9660160965794768
|
|
|
|
key: train_accuracy
|
|
value: [0.99370079 0.99212598 0.99055118 0.99527559 0.99055118 0.98897638
|
|
0.99213836 0.99056604 0.99528302 0.98899371]
|
|
|
|
mean value: 0.9918162234437676
|
|
|
|
key: test_fscore
|
|
value: [0.97222222 0.96 0.97297297 0.93150685 0.97222222 0.98550725
|
|
0.97222222 0.97222222 0.97222222 0.95652174]
|
|
|
|
mean value: 0.9667619918906399
|
|
|
|
key: train_fscore
|
|
value: [0.99371069 0.99215071 0.99059561 0.99530516 0.990625 0.98904538
|
|
0.99217527 0.990625 0.99530516 0.98907956]
|
|
|
|
mean value: 0.9918617558643608
|
|
|
|
key: test_precision
|
|
value: [0.97222222 0.92307692 0.94736842 0.89473684 0.94594595 1.
|
|
0.94594595 0.94594595 0.94594595 0.97058824]
|
|
|
|
mean value: 0.9491776427534941
|
|
|
|
key: train_precision
|
|
value: [0.99059561 0.9875 0.98442368 0.99065421 0.98447205 0.98442368
|
|
0.98753894 0.98447205 0.99065421 0.98142415]
|
|
|
|
mean value: 0.9866158563320804
|
|
|
|
key: test_recall
|
|
value: [0.97222222 1. 1. 0.97142857 1. 0.97142857
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9857936507936508
|
|
|
|
key: train_recall
|
|
value: [0.99684543 0.99684543 0.99684543 1. 0.99685535 0.99371069
|
|
0.99685535 0.99685535 1. 0.99685535]
|
|
|
|
mean value: 0.9971668353074222
|
|
|
|
key: test_roc_auc
|
|
value: [0.9718254 0.95714286 0.97142857 0.93015873 0.97222222 0.98571429
|
|
0.97142857 0.97142857 0.97142857 0.95714286]
|
|
|
|
mean value: 0.9659920634920635
|
|
|
|
key: train_roc_auc
|
|
value: [0.99370573 0.9921334 0.99056108 0.99526814 0.99054124 0.98896891
|
|
0.99213836 0.99056604 0.99528302 0.98899371]
|
|
|
|
mean value: 0.9918159633355158
|
|
|
|
key: test_jcc
|
|
value: [0.94594595 0.92307692 0.94736842 0.87179487 0.94594595 0.97142857
|
|
0.94594595 0.94594595 0.94594595 0.91666667]
|
|
|
|
mean value: 0.9360065183749394
|
|
|
|
key: train_jcc
|
|
value: [0.9875 0.98442368 0.98136646 0.99065421 0.98142415 0.97832817
|
|
0.98447205 0.98142415 0.99065421 0.97839506]
|
|
|
|
mean value: 0.9838642128860815
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0152359 0.01094818 0.01123309 0.01073027 0.01112604 0.01132703
|
|
0.0119102 0.01150513 0.01191497 0.01137686]
|
|
|
|
mean value: 0.011730766296386719
|
|
|
|
key: score_time
|
|
value: [0.01216507 0.00952792 0.01003885 0.00955248 0.00957537 0.01010394
|
|
0.00979304 0.00971627 0.00998306 0.00934339]
|
|
|
|
mean value: 0.009979939460754395
|
|
|
|
key: test_mcc
|
|
value: [0.57777778 0.52142857 0.75442414 0.57777778 0.83095238 0.61348603
|
|
0.57735027 0.57166195 0.55101405 0.51961524]
|
|
|
|
mean value: 0.6095488189953596
|
|
|
|
key: train_mcc
|
|
value: [0.71463911 0.59260819 0.67837961 0.72704944 0.71076053 0.72681542
|
|
0.69293487 0.66629844 0.68749537 0.76753852]
|
|
|
|
mean value: 0.6964519480059796
|
|
|
|
key: test_accuracy
|
|
value: [0.78873239 0.76056338 0.87323944 0.78873239 0.91549296 0.8028169
|
|
0.78571429 0.78571429 0.77142857 0.75714286]
|
|
|
|
mean value: 0.8029577464788732
|
|
|
|
key: train_accuracy
|
|
value: [0.85669291 0.78582677 0.83779528 0.86299213 0.85511811 0.86299213
|
|
0.84591195 0.83176101 0.8427673 0.8836478 ]
|
|
|
|
mean value: 0.8465505373149111
|
|
|
|
key: test_fscore
|
|
value: [0.78873239 0.76056338 0.86567164 0.78873239 0.91428571 0.78125
|
|
0.76923077 0.7826087 0.75 0.73846154]
|
|
|
|
mean value: 0.7939536528435325
|
|
|
|
key: train_fscore
|
|
value: [0.85203252 0.75272727 0.82975207 0.85945073 0.8525641 0.85990338
|
|
0.84142395 0.82372323 0.83660131 0.88216561]
|
|
|
|
mean value: 0.8390344159853997
|
|
|
|
key: test_precision
|
|
value: [0.8 0.77142857 0.93548387 0.77777778 0.91428571 0.86206897
|
|
0.83333333 0.79411765 0.82758621 0.8 ]
|
|
|
|
mean value: 0.8316082087265755
|
|
|
|
key: train_precision
|
|
value: [0.87919463 0.88841202 0.87152778 0.88372093 0.86928105 0.88118812
|
|
0.86666667 0.8650519 0.8707483 0.89354839]
|
|
|
|
mean value: 0.8769339776811071
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.75 0.80555556 0.8 0.91428571 0.71428571
|
|
0.71428571 0.77142857 0.68571429 0.68571429]
|
|
|
|
mean value: 0.7619047619047619
|
|
|
|
key: train_recall
|
|
value: [0.82649842 0.65299685 0.79179811 0.83647799 0.83647799 0.83962264
|
|
0.81761006 0.78616352 0.80503145 0.87106918]
|
|
|
|
mean value: 0.8063746205583001
|
|
|
|
key: test_roc_auc
|
|
value: [0.78888889 0.76071429 0.87420635 0.78888889 0.91547619 0.8015873
|
|
0.78571429 0.78571429 0.77142857 0.75714286]
|
|
|
|
mean value: 0.8029761904761905
|
|
|
|
key: train_roc_auc
|
|
value: [0.85664544 0.78561792 0.83772295 0.86303395 0.85514751 0.86302899
|
|
0.84591195 0.83176101 0.8427673 0.8836478 ]
|
|
|
|
mean value: 0.8465284804475924
|
|
|
|
key: test_jcc
|
|
value: [0.65116279 0.61363636 0.76315789 0.65116279 0.84210526 0.64102564
|
|
0.625 0.64285714 0.6 0.58536585]
|
|
|
|
mean value: 0.661547374046777
|
|
|
|
key: train_jcc
|
|
value: [0.74220963 0.60349854 0.70903955 0.75354108 0.74301676 0.75423729
|
|
0.72625698 0.70028011 0.71910112 0.78917379]
|
|
|
|
mean value: 0.7240354854478415
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01209974 0.01110792 0.01105142 0.01138878 0.01106238 0.01141858
|
|
0.01170826 0.01146555 0.01103926 0.01132655]
|
|
|
|
mean value: 0.011366844177246094
|
|
|
|
key: score_time
|
|
value: [0.00964522 0.00992823 0.00909424 0.00903535 0.00904679 0.008991
|
|
0.00982594 0.00964546 0.00908184 0.00939918]
|
|
|
|
mean value: 0.009369325637817384
|
|
|
|
key: test_mcc
|
|
value: [0.43675542 0.69047619 0.63383658 0.57777778 0.72937021 0.60555556
|
|
0.42857143 0.58321184 0.48650924 0.54643744]
|
|
|
|
mean value: 0.5718501681811895
|
|
|
|
key: train_mcc
|
|
value: [0.63874102 0.60335591 0.61928923 0.61308668 0.6193916 0.61325352
|
|
0.63907615 0.61713385 0.61646194 0.62924202]
|
|
|
|
mean value: 0.6209031925514391
|
|
|
|
key: test_accuracy
|
|
value: [0.71830986 0.84507042 0.81690141 0.78873239 0.85915493 0.8028169
|
|
0.71428571 0.78571429 0.74285714 0.77142857]
|
|
|
|
mean value: 0.7845271629778672
|
|
|
|
key: train_accuracy
|
|
value: [0.81889764 0.8015748 0.80944882 0.80629921 0.80944882 0.80629921
|
|
0.81918239 0.8081761 0.8081761 0.81446541]
|
|
|
|
mean value: 0.8101968503937008
|
|
|
|
key: test_fscore
|
|
value: [0.72972973 0.84507042 0.82191781 0.78873239 0.86842105 0.8
|
|
0.71428571 0.80519481 0.75 0.75757576]
|
|
|
|
mean value: 0.7880927684538173
|
|
|
|
key: train_fscore
|
|
value: [0.82334869 0.80373832 0.8124031 0.81047766 0.81355932 0.81105991
|
|
0.82334869 0.81288344 0.80634921 0.81733746]
|
|
|
|
mean value: 0.8134505798200699
|
|
|
|
key: test_precision
|
|
value: [0.71052632 0.85714286 0.81081081 0.77777778 0.80487805 0.8
|
|
0.71428571 0.73809524 0.72972973 0.80645161]
|
|
|
|
mean value: 0.7749698105315315
|
|
|
|
key: train_precision
|
|
value: [0.80239521 0.79384615 0.79878049 0.79456193 0.79758308 0.79279279
|
|
0.8048048 0.79341317 0.81410256 0.80487805]
|
|
|
|
mean value: 0.7997158250470955
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 0.83333333 0.8 0.94285714 0.8
|
|
0.71428571 0.88571429 0.77142857 0.71428571]
|
|
|
|
mean value: 0.8045238095238095
|
|
|
|
key: train_recall
|
|
value: [0.84542587 0.81388013 0.82649842 0.82704403 0.83018868 0.83018868
|
|
0.8427673 0.83333333 0.79874214 0.83018868]
|
|
|
|
mean value: 0.8278257246592465
|
|
|
|
key: test_roc_auc
|
|
value: [0.71785714 0.8452381 0.81666667 0.78888889 0.86031746 0.80277778
|
|
0.71428571 0.78571429 0.74285714 0.77142857]
|
|
|
|
mean value: 0.7846031746031746
|
|
|
|
key: train_roc_auc
|
|
value: [0.81893935 0.80159415 0.80947563 0.80626649 0.80941611 0.80626153
|
|
0.81918239 0.8081761 0.8081761 0.81446541]
|
|
|
|
mean value: 0.810195325675059
|
|
|
|
key: test_jcc
|
|
value: [0.57446809 0.73170732 0.69767442 0.65116279 0.76744186 0.66666667
|
|
0.55555556 0.67391304 0.6 0.6097561 ]
|
|
|
|
mean value: 0.6528345835208454
|
|
|
|
key: train_jcc
|
|
value: [0.6997389 0.671875 0.68407311 0.68134715 0.68571429 0.68217054
|
|
0.6997389 0.68475452 0.67553191 0.69109948]
|
|
|
|
mean value: 0.6856043805744364
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01445675 0.01169515 0.01169848 0.01117826 0.01003003 0.01143575
|
|
0.01147628 0.01169276 0.01095033 0.01098323]
|
|
|
|
mean value: 0.011559700965881348
|
|
|
|
key: score_time
|
|
value: [0.03864765 0.01363873 0.01281166 0.01314807 0.01347899 0.01384521
|
|
0.01404572 0.01386166 0.01362133 0.0188024 ]
|
|
|
|
mean value: 0.016590142250061037
|
|
|
|
key: test_mcc
|
|
value: [0.77565853 0.63383658 0.69643609 0.63643777 0.54920635 0.6656213
|
|
0.60395717 0.6350853 0.53218116 0.60395717]
|
|
|
|
mean value: 0.6332377423740981
|
|
|
|
key: train_mcc
|
|
value: [0.77250459 0.77250459 0.78646662 0.78288962 0.77474422 0.77541985
|
|
0.77412737 0.78676618 0.79103491 0.77343873]
|
|
|
|
mean value: 0.7789896676549471
|
|
|
|
key: test_accuracy
|
|
value: [0.88732394 0.81690141 0.84507042 0.81690141 0.77464789 0.83098592
|
|
0.8 0.81428571 0.75714286 0.8 ]
|
|
|
|
mean value: 0.8143259557344065
|
|
|
|
key: train_accuracy
|
|
value: [0.88503937 0.88503937 0.89133858 0.88818898 0.88661417 0.88661417
|
|
0.88522013 0.89150943 0.89465409 0.8836478 ]
|
|
|
|
mean value: 0.8877866092210172
|
|
|
|
key: test_fscore
|
|
value: [0.89189189 0.82191781 0.85714286 0.82191781 0.77142857 0.81818182
|
|
0.81081081 0.82666667 0.78481013 0.81081081]
|
|
|
|
mean value: 0.8215579169954061
|
|
|
|
key: train_fscore
|
|
value: [0.8892261 0.8892261 0.8962406 0.89512555 0.8902439 0.89090909
|
|
0.89055472 0.89655172 0.89802131 0.89053254]
|
|
|
|
mean value: 0.8926631649205217
|
|
|
|
key: test_precision
|
|
value: [0.86842105 0.81081081 0.80487805 0.78947368 0.77142857 0.87096774
|
|
0.76923077 0.775 0.70454545 0.76923077]
|
|
|
|
mean value: 0.7933986902804453
|
|
|
|
key: train_precision
|
|
value: [0.85672515 0.85672515 0.85632184 0.84401114 0.86390533 0.85964912
|
|
0.85100287 0.85673352 0.87020649 0.84078212]
|
|
|
|
mean value: 0.8556062724055564
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.91666667 0.85714286 0.77142857 0.77142857
|
|
0.85714286 0.88571429 0.88571429 0.85714286]
|
|
|
|
mean value: 0.8552380952380952
|
|
|
|
key: train_recall
|
|
value: [0.92429022 0.92429022 0.94006309 0.95283019 0.91823899 0.9245283
|
|
0.93396226 0.94025157 0.92767296 0.94654088]
|
|
|
|
mean value: 0.9332668690355733
|
|
|
|
key: test_roc_auc
|
|
value: [0.88690476 0.81666667 0.84404762 0.81746032 0.77460317 0.83015873
|
|
0.8 0.81428571 0.75714286 0.8 ]
|
|
|
|
mean value: 0.8141269841269841
|
|
|
|
key: train_roc_auc
|
|
value: [0.88510109 0.88510109 0.89141519 0.88808702 0.88656429 0.88655437
|
|
0.88522013 0.89150943 0.89465409 0.8836478 ]
|
|
|
|
mean value: 0.8877854492788129
|
|
|
|
key: test_jcc
|
|
value: [0.80487805 0.69767442 0.75 0.69767442 0.62790698 0.69230769
|
|
0.68181818 0.70454545 0.64583333 0.68181818]
|
|
|
|
mean value: 0.698445670655682
|
|
|
|
key: train_jcc
|
|
value: [0.80054645 0.80054645 0.8119891 0.81016043 0.8021978 0.80327869
|
|
0.8027027 0.8125 0.81491713 0.80266667]
|
|
|
|
mean value: 0.8061505411963373
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03113008 0.0284884 0.02862453 0.02894711 0.028687 0.02866578
|
|
0.02847266 0.02867603 0.03194046 0.02795339]
|
|
|
|
mean value: 0.029158544540405274
|
|
|
|
key: score_time
|
|
value: [0.01377654 0.01377273 0.01601887 0.01397324 0.01376891 0.01377559
|
|
0.01362824 0.01372671 0.01443839 0.01362658]
|
|
|
|
mean value: 0.014050579071044922
|
|
|
|
key: test_mcc
|
|
value: [0.77460317 0.88862624 0.83214239 0.69047619 0.77460317 0.83095238
|
|
0.77269114 0.80829038 0.68599434 0.71545476]
|
|
|
|
mean value: 0.7773834160282649
|
|
|
|
key: train_mcc
|
|
value: [0.85511775 0.8614881 0.83636456 0.83006402 0.83622007 0.83937305
|
|
0.8585967 0.83333745 0.86512643 0.8459496 ]
|
|
|
|
mean value: 0.8461637711623139
|
|
|
|
key: test_accuracy
|
|
value: [0.88732394 0.94366197 0.91549296 0.84507042 0.88732394 0.91549296
|
|
0.88571429 0.9 0.84285714 0.85714286]
|
|
|
|
mean value: 0.8880080482897384
|
|
|
|
key: train_accuracy
|
|
value: [0.92755906 0.93070866 0.91811024 0.91496063 0.91811024 0.91968504
|
|
0.92924528 0.91666667 0.93238994 0.92295597]
|
|
|
|
mean value: 0.9230391719902936
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.94594595 0.91891892 0.84507042 0.88571429 0.91428571
|
|
0.88888889 0.90666667 0.84507042 0.85294118]
|
|
|
|
mean value: 0.889239133085032
|
|
|
|
key: train_fscore
|
|
value: [0.92744479 0.93103448 0.91719745 0.91588785 0.91823899 0.91993721
|
|
0.92979719 0.91653543 0.93333333 0.92331768]
|
|
|
|
mean value: 0.9232724421943014
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.92105263 0.89473684 0.83333333 0.88571429 0.91428571
|
|
0.86486486 0.85 0.83333333 0.87878788]
|
|
|
|
mean value: 0.876499777289251
|
|
|
|
key: train_precision
|
|
value: [0.92744479 0.92523364 0.92604502 0.90740741 0.91823899 0.9184953
|
|
0.92260062 0.91798107 0.9204893 0.91900312]
|
|
|
|
mean value: 0.9202939258464541
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.97222222 0.94444444 0.85714286 0.88571429 0.91428571
|
|
0.91428571 0.97142857 0.85714286 0.82857143]
|
|
|
|
mean value: 0.9034126984126984
|
|
|
|
key: train_recall
|
|
value: [0.92744479 0.93690852 0.90851735 0.9245283 0.91823899 0.92138365
|
|
0.93710692 0.91509434 0.94654088 0.92767296]
|
|
|
|
mean value: 0.9263436700196417
|
|
|
|
key: test_roc_auc
|
|
value: [0.88730159 0.94325397 0.91507937 0.8452381 0.88730159 0.91547619
|
|
0.88571429 0.9 0.84285714 0.85714286]
|
|
|
|
mean value: 0.8879365079365079
|
|
|
|
key: train_roc_auc
|
|
value: [0.92755888 0.93071841 0.91809515 0.91494554 0.91811003 0.91968236
|
|
0.92924528 0.91666667 0.93238994 0.92295597]
|
|
|
|
mean value: 0.9230368232049679
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.8974359 0.85 0.73170732 0.79487179 0.84210526
|
|
0.8 0.82926829 0.73170732 0.74358974]
|
|
|
|
mean value: 0.8020685625884599
|
|
|
|
key: train_jcc
|
|
value: [0.86470588 0.87096774 0.84705882 0.84482759 0.84883721 0.85174419
|
|
0.86880466 0.84593023 0.875 0.85755814]
|
|
|
|
mean value: 0.8575434466189625
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.49309087 2.5709374 2.5931356 2.05507874 2.55855608 2.35582185
|
|
2.62188935 2.10693741 2.05355692 2.35530043]
|
|
|
|
mean value: 2.3764304637908937
|
|
|
|
key: score_time
|
|
value: [0.01243663 0.01508927 0.01480412 0.01255894 0.01479626 0.01233006
|
|
0.02253532 0.01255274 0.01245928 0.0125041 ]
|
|
|
|
mean value: 0.014206671714782714
|
|
|
|
key: test_mcc
|
|
value: [0.97220047 0.9186708 0.94511009 0.86237318 0.9451949 0.9451949
|
|
0.91766294 0.94440028 0.94440028 0.88571429]
|
|
|
|
mean value: 0.9280922117092306
|
|
|
|
key: train_mcc
|
|
value: [0.99685535 0.99685535 0.99685535 0.99372043 0.99685531 0.99372043
|
|
0.99686027 0.99371069 0.99371069 0.99686027]
|
|
|
|
mean value: 0.9956004136653424
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 0.95774648 0.97183099 0.92957746 0.97183099 0.97183099
|
|
0.95714286 0.97142857 0.97142857 0.94285714]
|
|
|
|
mean value: 0.963158953722334
|
|
|
|
key: train_accuracy
|
|
value: [0.9984252 0.9984252 0.9984252 0.99685039 0.9984252 0.99685039
|
|
0.99842767 0.99685535 0.99685535 0.99842767]
|
|
|
|
mean value: 0.9977967612538998
|
|
|
|
key: test_fscore
|
|
value: [0.98630137 0.96 0.97297297 0.93150685 0.97222222 0.97222222
|
|
0.95890411 0.97222222 0.97222222 0.94285714]
|
|
|
|
mean value: 0.9641431333486128
|
|
|
|
key: train_fscore
|
|
value: [0.9984252 0.9984252 0.9984252 0.9968652 0.99843014 0.9968652
|
|
0.99843014 0.99685535 0.99685535 0.99843014]
|
|
|
|
mean value: 0.9978007113760444
|
|
|
|
key: test_precision
|
|
value: [0.97297297 0.92307692 0.94736842 0.89473684 0.94594595 0.94594595
|
|
0.92105263 0.94594595 0.94594595 0.94285714]
|
|
|
|
mean value: 0.9385848717427665
|
|
|
|
key: train_precision
|
|
value: [0.99685535 0.99685535 0.99685535 0.99375 0.9968652 0.99375
|
|
0.9968652 0.99685535 0.99685535 0.9968652 ]
|
|
|
|
mean value: 0.9962372340845015
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.97142857 1. 1.
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9914285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.99685535 0.99685535 1. ]
|
|
|
|
mean value: 0.9993710691823899
|
|
|
|
key: test_roc_auc
|
|
value: [0.98571429 0.95714286 0.97142857 0.93015873 0.97222222 0.97222222
|
|
0.95714286 0.97142857 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9631746031746031
|
|
|
|
key: train_roc_auc
|
|
value: [0.99842767 0.99842767 0.99842767 0.99684543 0.99842271 0.99684543
|
|
0.99842767 0.99685535 0.99685535 0.99842767]
|
|
|
|
mean value: 0.9977962621272544
|
|
|
|
key: test_jcc
|
|
value: [0.97297297 0.92307692 0.94736842 0.87179487 0.94594595 0.94594595
|
|
0.92105263 0.94594595 0.94594595 0.89189189]
|
|
|
|
mean value: 0.9311941496152023
|
|
|
|
key: train_jcc
|
|
value: [0.99685535 0.99685535 0.99685535 0.99375 0.9968652 0.99375
|
|
0.9968652 0.99373041 0.99373041 0.9968652 ]
|
|
|
|
mean value: 0.9956122464068138
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04619694 0.02680802 0.02502728 0.02394152 0.02268791 0.02495456
|
|
0.02386546 0.02648425 0.02523398 0.02658558]
|
|
|
|
mean value: 0.02717854976654053
|
|
|
|
key: score_time
|
|
value: [0.00964713 0.00897169 0.00894856 0.00895596 0.00889874 0.0089829
|
|
0.0089438 0.00895047 0.00891232 0.00898528]
|
|
|
|
mean value: 0.009019684791564942
|
|
|
|
key: test_mcc
|
|
value: [0.97220047 1. 0.97222222 0.91885703 0.9451949 0.97222222
|
|
0.91766294 1. 0.91766294 1. ]
|
|
|
|
mean value: 0.9616022710173856
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 1. 0.98591549 0.95774648 0.97183099 0.98591549
|
|
0.95714286 1. 0.95714286 1. ]
|
|
|
|
mean value: 0.9801609657947686
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98630137 1. 0.98591549 0.95890411 0.97222222 0.98591549
|
|
0.95890411 1. 0.95890411 1. ]
|
|
|
|
mean value: 0.9807066906767852
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97297297 1. 1. 0.92105263 0.94594595 0.97222222
|
|
0.92105263 1. 0.92105263 1. ]
|
|
|
|
mean value: 0.9654299035877983
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9972222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98571429 1. 0.98611111 0.95833333 0.97222222 0.98611111
|
|
0.95714286 1. 0.95714286 1. ]
|
|
|
|
mean value: 0.9802777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97297297 1. 0.97222222 0.92105263 0.94594595 0.97222222
|
|
0.92105263 1. 0.92105263 1. ]
|
|
|
|
mean value: 0.9626521258100206
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13009119 0.12904167 0.12866402 0.12830615 0.12942195 0.12825227
|
|
0.12984514 0.12967873 0.1306324 0.12854242]
|
|
|
|
mean value: 0.1292475938796997
|
|
|
|
key: score_time
|
|
value: [0.01796412 0.01812577 0.01803756 0.01811409 0.0183053 0.01822281
|
|
0.01817417 0.01823258 0.01810813 0.01828384]
|
|
|
|
mean value: 0.018156838417053223
|
|
|
|
key: test_mcc
|
|
value: [0.97220047 0.97220047 0.97222222 0.88730159 0.97222222 0.94511009
|
|
0.91766294 1. 0.91766294 0.91465912]
|
|
|
|
mean value: 0.9471242044785104
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 0.98591549 0.98591549 0.94366197 0.98591549 0.97183099
|
|
0.95714286 1. 0.95714286 0.95714286]
|
|
|
|
mean value: 0.9730583501006036
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98630137 0.98630137 0.98591549 0.94285714 0.98591549 0.97058824
|
|
0.95890411 1. 0.95890411 0.95652174]
|
|
|
|
mean value: 0.9732209062101298
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97297297 0.97297297 1. 0.94285714 0.97222222 1.
|
|
0.92105263 1. 0.92105263 0.97058824]
|
|
|
|
mean value: 0.9673718809477323
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 0.94285714 1. 0.94285714
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9800793650793651
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98571429 0.98571429 0.98611111 0.94365079 0.98611111 0.97142857
|
|
0.95714286 1. 0.95714286 0.95714286]
|
|
|
|
mean value: 0.973015873015873
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97297297 0.97297297 0.97222222 0.89189189 0.97222222 0.94285714
|
|
0.92105263 1. 0.92105263 0.91666667]
|
|
|
|
mean value: 0.9483911354963986
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01092458 0.01107407 0.01101637 0.01101971 0.01122189 0.01097798
|
|
0.01097846 0.01098228 0.01109624 0.01085091]
|
|
|
|
mean value: 0.011014246940612793
|
|
|
|
key: score_time
|
|
value: [0.00895238 0.00893617 0.0090847 0.00909424 0.00898981 0.00900674
|
|
0.00902057 0.00900197 0.00905633 0.00904799]
|
|
|
|
mean value: 0.009019088745117188
|
|
|
|
key: test_mcc
|
|
value: [0.89282857 0.86753285 0.81050059 0.70470171 0.86802778 0.85952381
|
|
0.91766294 0.8660254 0.79240582 0.82992752]
|
|
|
|
mean value: 0.8409136987468386
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94366197 0.92957746 0.90140845 0.84507042 0.92957746 0.92957746
|
|
0.95714286 0.92857143 0.88571429 0.91428571]
|
|
|
|
mean value: 0.9164587525150906
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 0.93506494 0.90909091 0.85714286 0.93333333 0.92957746
|
|
0.95890411 0.93333333 0.8974359 0.91666667]
|
|
|
|
mean value: 0.9217917927498337
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.87804878 0.85365854 0.78571429 0.875 0.91666667
|
|
0.92105263 0.875 0.81395349 0.89189189]
|
|
|
|
mean value: 0.8710986281297055
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 0.94285714 1. 0.94285714
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9800793650793651
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94285714 0.92857143 0.90039683 0.84642857 0.93055556 0.9297619
|
|
0.95714286 0.92857143 0.88571429 0.91428571]
|
|
|
|
mean value: 0.9164285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 0.87804878 0.83333333 0.75 0.875 0.86842105
|
|
0.92105263 0.875 0.81395349 0.84615385]
|
|
|
|
mean value: 0.8560963132557604
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.90585661 1.92890406 2.04147029 2.01390171 1.96589684 1.89213848
|
|
1.9139514 1.87380958 1.86450362 1.87568569]
|
|
|
|
mean value: 1.9276118278503418
|
|
|
|
key: score_time
|
|
value: [0.09954929 0.09601259 0.10409904 0.10439253 0.09576201 0.1043303
|
|
0.09512854 0.09506869 0.09564662 0.15103364]
|
|
|
|
mean value: 0.10410232543945312
|
|
|
|
key: test_mcc
|
|
value: [0.94511009 1. 0.97222222 0.88730159 0.9451949 1.
|
|
0.91766294 1. 0.91766294 0.94440028]
|
|
|
|
mean value: 0.9529554949390513
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 1. 0.98591549 0.94366197 0.97183099 1.
|
|
0.95714286 1. 0.95714286 0.97142857]
|
|
|
|
mean value: 0.9758953722334004
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97297297 1. 0.98591549 0.94285714 0.97222222 1.
|
|
0.95890411 1. 0.95890411 0.97058824]
|
|
|
|
mean value: 0.9762364285482285
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94736842 1. 1. 0.94285714 0.94594595 1.
|
|
0.92105263 1. 0.92105263 1. ]
|
|
|
|
mean value: 0.9678276773013615
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 0.94285714 1. 1.
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9857936507936508
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97142857 1. 0.98611111 0.94365079 0.97222222 1.
|
|
0.95714286 1. 0.95714286 0.97142857]
|
|
|
|
mean value: 0.9759126984126985
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94736842 1. 0.97222222 0.89189189 0.94594595 1.
|
|
0.92105263 1. 0.92105263 0.94285714]
|
|
|
|
mean value: 0.954239088712773
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.04049039 0.9767766 1.00475955 1.05536938 1.07132578 1.01439166
|
|
1.02572823 1.17338204 0.99154377 1.03597832]
|
|
|
|
mean value: 1.0389745712280274
|
|
|
|
key: score_time
|
|
value: [0.21255541 0.28017068 0.26342106 0.19300008 0.25265646 0.26558733
|
|
0.24214935 0.19044924 0.26128864 0.27617431]
|
|
|
|
mean value: 0.24374525547027587
|
|
|
|
key: test_mcc
|
|
value: [0.94511009 0.97220047 0.97222222 0.8594125 0.9451949 0.97220047
|
|
0.91766294 1. 0.91766294 0.94440028]
|
|
|
|
mean value: 0.9446066796170173
|
|
|
|
key: train_mcc
|
|
value: [0.97511365 0.97819122 0.97819122 0.98117981 0.981277 0.97497646
|
|
0.98439842 0.97822445 0.98439842 0.9751514 ]
|
|
|
|
mean value: 0.9791102046785018
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 0.98591549 0.98591549 0.92957746 0.97183099 0.98591549
|
|
0.95714286 1. 0.95714286 0.97142857]
|
|
|
|
mean value: 0.9716700201207243
|
|
|
|
key: train_accuracy
|
|
value: [0.98740157 0.98897638 0.98897638 0.99055118 0.99055118 0.98740157
|
|
0.99213836 0.98899371 0.99213836 0.98742138]
|
|
|
|
mean value: 0.9894550091615907
|
|
|
|
key: test_fscore
|
|
value: [0.97297297 0.98630137 0.98591549 0.92753623 0.97222222 0.98550725
|
|
0.95890411 1. 0.95890411 0.97058824]
|
|
|
|
mean value: 0.9718851990749025
|
|
|
|
key: train_fscore
|
|
value: [0.98753894 0.98907956 0.98907956 0.990625 0.99065421 0.98753894
|
|
0.99219969 0.98911353 0.99219969 0.98757764]
|
|
|
|
mean value: 0.9895606759645655
|
|
|
|
key: test_precision
|
|
value: [0.94736842 0.97297297 1. 0.94117647 0.94594595 1.
|
|
0.92105263 1. 0.92105263 1. ]
|
|
|
|
mean value: 0.9649569073717681
|
|
|
|
key: train_precision
|
|
value: [0.97538462 0.97839506 0.97839506 0.98447205 0.98148148 0.97839506
|
|
0.98452012 0.97846154 0.98452012 0.97546012]
|
|
|
|
mean value: 0.9799485240579667
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 0.91428571 1. 0.97142857
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9800793650793651
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.99685535 1. 0.99685535
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9993710691823899
|
|
|
|
key: test_roc_auc
|
|
value: [0.97142857 0.98571429 0.98611111 0.92936508 0.97222222 0.98571429
|
|
0.95714286 1. 0.95714286 0.97142857]
|
|
|
|
mean value: 0.9716269841269841
|
|
|
|
key: train_roc_auc
|
|
value: [0.98742138 0.98899371 0.98899371 0.99054124 0.99053628 0.98738666
|
|
0.99213836 0.98899371 0.99213836 0.98742138]
|
|
|
|
mean value: 0.9894564807650339
|
|
|
|
key: test_jcc
|
|
value: [0.94736842 0.97297297 0.97222222 0.86486486 0.94594595 0.97142857
|
|
0.92105263 1. 0.92105263 0.94285714]
|
|
|
|
mean value: 0.9459765404502246
|
|
|
|
key: train_jcc
|
|
value: [0.97538462 0.97839506 0.97839506 0.98142415 0.98148148 0.97538462
|
|
0.98452012 0.97846154 0.98452012 0.97546012]
|
|
|
|
mean value: 0.9793426893153258
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02535391 0.01058316 0.01061893 0.01099873 0.01078463 0.01064086
|
|
0.01167011 0.01053834 0.01053047 0.01068544]
|
|
|
|
mean value: 0.012240457534790038
|
|
|
|
key: score_time
|
|
value: [0.01071906 0.00890875 0.0089922 0.00990939 0.0088563 0.00895762
|
|
0.00901484 0.00889277 0.00889587 0.00956297]
|
|
|
|
mean value: 0.009270977973937989
|
|
|
|
key: test_mcc
|
|
value: [0.43675542 0.69047619 0.63383658 0.57777778 0.72937021 0.60555556
|
|
0.42857143 0.58321184 0.48650924 0.54643744]
|
|
|
|
mean value: 0.5718501681811895
|
|
|
|
key: train_mcc
|
|
value: [0.63874102 0.60335591 0.61928923 0.61308668 0.6193916 0.61325352
|
|
0.63907615 0.61713385 0.61646194 0.62924202]
|
|
|
|
mean value: 0.6209031925514391
|
|
|
|
key: test_accuracy
|
|
value: [0.71830986 0.84507042 0.81690141 0.78873239 0.85915493 0.8028169
|
|
0.71428571 0.78571429 0.74285714 0.77142857]
|
|
|
|
mean value: 0.7845271629778672
|
|
|
|
key: train_accuracy
|
|
value: [0.81889764 0.8015748 0.80944882 0.80629921 0.80944882 0.80629921
|
|
0.81918239 0.8081761 0.8081761 0.81446541]
|
|
|
|
mean value: 0.8101968503937008
|
|
|
|
key: test_fscore
|
|
value: [0.72972973 0.84507042 0.82191781 0.78873239 0.86842105 0.8
|
|
0.71428571 0.80519481 0.75 0.75757576]
|
|
|
|
mean value: 0.7880927684538173
|
|
|
|
key: train_fscore
|
|
value: [0.82334869 0.80373832 0.8124031 0.81047766 0.81355932 0.81105991
|
|
0.82334869 0.81288344 0.80634921 0.81733746]
|
|
|
|
mean value: 0.8134505798200699
|
|
|
|
key: test_precision
|
|
value: [0.71052632 0.85714286 0.81081081 0.77777778 0.80487805 0.8
|
|
0.71428571 0.73809524 0.72972973 0.80645161]
|
|
|
|
mean value: 0.7749698105315315
|
|
|
|
key: train_precision
|
|
value: [0.80239521 0.79384615 0.79878049 0.79456193 0.79758308 0.79279279
|
|
0.8048048 0.79341317 0.81410256 0.80487805]
|
|
|
|
mean value: 0.7997158250470955
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 0.83333333 0.8 0.94285714 0.8
|
|
0.71428571 0.88571429 0.77142857 0.71428571]
|
|
|
|
mean value: 0.8045238095238095
|
|
|
|
key: train_recall
|
|
value: [0.84542587 0.81388013 0.82649842 0.82704403 0.83018868 0.83018868
|
|
0.8427673 0.83333333 0.79874214 0.83018868]
|
|
|
|
mean value: 0.8278257246592465
|
|
|
|
key: test_roc_auc
|
|
value: [0.71785714 0.8452381 0.81666667 0.78888889 0.86031746 0.80277778
|
|
0.71428571 0.78571429 0.74285714 0.77142857]
|
|
|
|
mean value: 0.7846031746031746
|
|
|
|
key: train_roc_auc
|
|
value: [0.81893935 0.80159415 0.80947563 0.80626649 0.80941611 0.80626153
|
|
0.81918239 0.8081761 0.8081761 0.81446541]
|
|
|
|
mean value: 0.810195325675059
|
|
|
|
key: test_jcc
|
|
value: [0.57446809 0.73170732 0.69767442 0.65116279 0.76744186 0.66666667
|
|
0.55555556 0.67391304 0.6 0.6097561 ]
|
|
|
|
mean value: 0.6528345835208454
|
|
|
|
key: train_jcc
|
|
value: [0.6997389 0.671875 0.68407311 0.68134715 0.68571429 0.68217054
|
|
0.6997389 0.68475452 0.67553191 0.69109948]
|
|
|
|
mean value: 0.6856043805744364
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.09717965 0.07555246 0.07741308 0.07850742 0.07405233 0.07696462
|
|
0.07409954 0.11014795 0.07720542 0.08413363]
|
|
|
|
mean value: 0.08252561092376709
|
|
|
|
key: score_time
|
|
value: [0.0112009 0.01157093 0.01143789 0.01287317 0.01079273 0.01109099
|
|
0.0111444 0.01221967 0.01136827 0.0112431 ]
|
|
|
|
mean value: 0.011494207382202148
|
|
|
|
key: test_mcc
|
|
value: [0.94511009 1. 0.97222222 0.9451949 0.9451949 1.
|
|
0.94440028 1. 0.91766294 0.97182532]
|
|
|
|
mean value: 0.9641610639455782
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 1. 0.98591549 0.97183099 0.97183099 1.
|
|
0.97142857 1. 0.95714286 0.98571429]
|
|
|
|
mean value: 0.981569416498994
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97297297 1. 0.98591549 0.97222222 0.97222222 1.
|
|
0.97222222 1. 0.95890411 0.98591549]
|
|
|
|
mean value: 0.9820374735144174
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94736842 1. 1. 0.94594595 0.94594595 1.
|
|
0.94594595 1. 0.92105263 0.97222222]
|
|
|
|
mean value: 0.9678481112691639
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9972222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97142857 1. 0.98611111 0.97222222 0.97222222 1.
|
|
0.97142857 1. 0.95714286 0.98571429]
|
|
|
|
mean value: 0.9816269841269841
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94736842 1. 0.97222222 0.94594595 0.94594595 1.
|
|
0.94594595 1. 0.92105263 0.97222222]
|
|
|
|
mean value: 0.9650703334913862
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04567671 0.0669713 0.04753923 0.06718087 0.04788256 0.07548046
|
|
0.04902506 0.07634139 0.09599304 0.08620238]
|
|
|
|
mean value: 0.06582930088043212
|
|
|
|
key: score_time
|
|
value: [0.01886463 0.01216483 0.01325965 0.01217723 0.01881051 0.0122354
|
|
0.01873899 0.01875687 0.01861477 0.01226044]
|
|
|
|
mean value: 0.015588331222534179
|
|
|
|
key: test_mcc
|
|
value: [0.86237318 0.94511009 0.97220047 0.8594125 0.9451949 0.85952381
|
|
0.94440028 0.91465912 0.8660254 0.82992752]
|
|
|
|
mean value: 0.8998827271719281
|
|
|
|
key: train_mcc
|
|
value: [0.95592481 0.9433251 0.94649802 0.95276028 0.95287512 0.94649961
|
|
0.94976067 0.94654556 0.946583 0.9528349 ]
|
|
|
|
mean value: 0.9493607068575035
|
|
|
|
key: test_accuracy
|
|
value: [0.92957746 0.97183099 0.98591549 0.92957746 0.97183099 0.92957746
|
|
0.97142857 0.95714286 0.92857143 0.91428571]
|
|
|
|
mean value: 0.9489738430583501
|
|
|
|
key: train_accuracy
|
|
value: [0.97795276 0.97165354 0.97322835 0.97637795 0.97637795 0.97322835
|
|
0.97484277 0.97327044 0.97327044 0.97641509]
|
|
|
|
mean value: 0.974661763977616
|
|
|
|
key: test_fscore
|
|
value: [0.92753623 0.97297297 0.98630137 0.92753623 0.97222222 0.92957746
|
|
0.97222222 0.95774648 0.93333333 0.91176471]
|
|
|
|
mean value: 0.9491213233926206
|
|
|
|
key: train_fscore
|
|
value: [0.97798742 0.97151899 0.97305864 0.97645212 0.97622821 0.97314376
|
|
0.97468354 0.97322835 0.97314376 0.97637795]
|
|
|
|
mean value: 0.9745822737574066
|
|
|
|
key: test_precision
|
|
value: [0.96969697 0.94736842 0.97297297 0.94117647 0.94594595 0.91666667
|
|
0.94594595 0.94444444 0.875 0.93939394]
|
|
|
|
mean value: 0.9398611776707751
|
|
|
|
key: train_precision
|
|
value: [0.97492163 0.97460317 0.97770701 0.97492163 0.98402556 0.97777778
|
|
0.98089172 0.97476341 0.97777778 0.97791798]
|
|
|
|
mean value: 0.9775307663579518
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 0.91428571 1. 0.94285714
|
|
1. 0.97142857 1. 0.88571429]
|
|
|
|
mean value: 0.9603174603174603
|
|
|
|
key: train_recall
|
|
value: [0.98107256 0.96845426 0.96845426 0.97798742 0.96855346 0.96855346
|
|
0.96855346 0.97169811 0.96855346 0.97484277]
|
|
|
|
mean value: 0.9716723210919985
|
|
|
|
key: test_roc_auc
|
|
value: [0.93015873 0.97142857 0.98571429 0.92936508 0.97222222 0.9297619
|
|
0.97142857 0.95714286 0.92857143 0.91428571]
|
|
|
|
mean value: 0.9490079365079365
|
|
|
|
key: train_roc_auc
|
|
value: [0.97795766 0.97164851 0.97322084 0.97637541 0.97639029 0.97323572
|
|
0.97484277 0.97327044 0.97327044 0.97641509]
|
|
|
|
mean value: 0.9746627184889789
|
|
|
|
key: test_jcc
|
|
value: [0.86486486 0.94736842 0.97297297 0.86486486 0.94594595 0.86842105
|
|
0.94594595 0.91891892 0.875 0.83783784]
|
|
|
|
mean value: 0.9042140825035562
|
|
|
|
key: train_jcc
|
|
value: [0.95692308 0.94461538 0.94753086 0.95398773 0.95356037 0.94769231
|
|
0.95061728 0.94785276 0.94769231 0.95384615]
|
|
|
|
mean value: 0.9504318241231953
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01599789 0.0108223 0.01035357 0.01029944 0.01019597 0.0102706
|
|
0.01015592 0.01026535 0.01045132 0.01049185]
|
|
|
|
mean value: 0.010930418968200684
|
|
|
|
key: score_time
|
|
value: [0.0279007 0.0092783 0.00870371 0.00873184 0.00875592 0.00890112
|
|
0.00881505 0.0089035 0.00900555 0.00883865]
|
|
|
|
mean value: 0.01078343391418457
|
|
|
|
key: test_mcc
|
|
value: [0.46507937 0.81050059 0.77565853 0.63643777 0.7380153 0.61005483
|
|
0.45732956 0.6882472 0.65821838 0.71428571]
|
|
|
|
mean value: 0.6553827252310759
|
|
|
|
key: train_mcc
|
|
value: [0.68012167 0.65766425 0.66118827 0.66469961 0.67205302 0.66469961
|
|
0.68033605 0.65895854 0.69197508 0.69506299]
|
|
|
|
mean value: 0.6726759089900314
|
|
|
|
key: test_accuracy
|
|
value: [0.73239437 0.90140845 0.88732394 0.81690141 0.85915493 0.8028169
|
|
0.72857143 0.84285714 0.82857143 0.85714286]
|
|
|
|
mean value: 0.8257142857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.83937008 0.82834646 0.82992126 0.83149606 0.83464567 0.83149606
|
|
0.83962264 0.82861635 0.84433962 0.84433962]
|
|
|
|
mean value: 0.8352193829544892
|
|
|
|
key: test_fscore
|
|
value: [0.73239437 0.90909091 0.89189189 0.82191781 0.87179487 0.81081081
|
|
0.73239437 0.84931507 0.83333333 0.85714286]
|
|
|
|
mean value: 0.8310086283171368
|
|
|
|
key: train_fscore
|
|
value: [0.8440367 0.83256528 0.83486239 0.83763278 0.84210526 0.83763278
|
|
0.8440367 0.83459788 0.85157421 0.85419735]
|
|
|
|
mean value: 0.8413241318527402
|
|
|
|
key: test_precision
|
|
value: [0.74285714 0.85365854 0.86842105 0.78947368 0.79069767 0.76923077
|
|
0.72222222 0.81578947 0.81081081 0.85714286]
|
|
|
|
mean value: 0.8020304223794088
|
|
|
|
key: train_precision
|
|
value: [0.8189911 0.81137725 0.81008902 0.80938416 0.80691643 0.80938416
|
|
0.82142857 0.80645161 0.81375358 0.8033241 ]
|
|
|
|
mean value: 0.811109998487874
|
|
|
|
key: test_recall
|
|
value: [0.72222222 0.97222222 0.91666667 0.85714286 0.97142857 0.85714286
|
|
0.74285714 0.88571429 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8639682539682539
|
|
|
|
key: train_recall
|
|
value: [0.87066246 0.85488959 0.86119874 0.86792453 0.88050314 0.86792453
|
|
0.86792453 0.86477987 0.89308176 0.91194969]
|
|
|
|
mean value: 0.8740838838957998
|
|
|
|
key: test_roc_auc
|
|
value: [0.73253968 0.90039683 0.88690476 0.81746032 0.86071429 0.80357143
|
|
0.72857143 0.84285714 0.82857143 0.85714286]
|
|
|
|
mean value: 0.8258730158730159
|
|
|
|
key: train_roc_auc
|
|
value: [0.83941928 0.82838819 0.82997044 0.8314386 0.83457334 0.8314386
|
|
0.83962264 0.82861635 0.84433962 0.84433962]
|
|
|
|
mean value: 0.8352146697617205
|
|
|
|
key: test_jcc
|
|
value: [0.57777778 0.83333333 0.80487805 0.69767442 0.77272727 0.68181818
|
|
0.57777778 0.73809524 0.71428571 0.75 ]
|
|
|
|
mean value: 0.7148367763200435
|
|
|
|
key: train_jcc
|
|
value: [0.73015873 0.71315789 0.71653543 0.72062663 0.72727273 0.72062663
|
|
0.73015873 0.71614583 0.74151436 0.74550129]
|
|
|
|
mean value: 0.7261698258099161
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02183795 0.02980208 0.02798867 0.03096056 0.03057289 0.02582502
|
|
0.03036189 0.03267241 0.05372787 0.02473903]
|
|
|
|
mean value: 0.03084883689880371
|
|
|
|
key: score_time
|
|
value: [0.01134133 0.01184154 0.01175618 0.01180649 0.01205015 0.01181984
|
|
0.01184368 0.01185703 0.01455235 0.01725435]
|
|
|
|
mean value: 0.012612295150756837
|
|
|
|
key: test_mcc
|
|
value: [0.83095238 0.97220047 0.94511009 0.83240693 0.9451949 0.9186708
|
|
0.94440028 0.91766294 0.81649658 0.68572751]
|
|
|
|
mean value: 0.8808822882251532
|
|
|
|
key: train_mcc
|
|
value: [0.93442263 0.9433251 0.94968055 0.97188874 0.95909844 0.91598942
|
|
0.95614505 0.95389198 0.88121298 0.87813023]
|
|
|
|
mean value: 0.9343785115954156
|
|
|
|
key: test_accuracy
|
|
value: [0.91549296 0.98591549 0.97183099 0.91549296 0.97183099 0.95774648
|
|
0.97142857 0.95714286 0.9 0.82857143]
|
|
|
|
mean value: 0.9375452716297787
|
|
|
|
key: train_accuracy
|
|
value: [0.96692913 0.97165354 0.97480315 0.98582677 0.97952756 0.95748031
|
|
0.97798742 0.97641509 0.93710692 0.93710692]
|
|
|
|
mean value: 0.9664836824642202
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.98630137 0.97297297 0.91666667 0.97222222 0.95522388
|
|
0.97222222 0.95890411 0.90909091 0.8 ]
|
|
|
|
mean value: 0.9360271019890729
|
|
|
|
key: train_fscore
|
|
value: [0.96744186 0.97151899 0.97460317 0.98600311 0.97946288 0.95652174
|
|
0.97777778 0.97695853 0.9408284 0.9339934 ]
|
|
|
|
mean value: 0.9665109851988075
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.97297297 0.94736842 0.89189189 0.94594595 1.
|
|
0.94594595 0.92105263 0.83333333 0.96 ]
|
|
|
|
mean value: 0.9335177809388335
|
|
|
|
key: train_precision
|
|
value: [0.95121951 0.97460317 0.98083067 0.97538462 0.98412698 0.98019802
|
|
0.98717949 0.95495495 0.88826816 0.98263889]
|
|
|
|
mean value: 0.9659404464486305
|
|
|
|
key: test_recall
|
|
value: [0.91666667 1. 1. 0.94285714 1. 0.91428571
|
|
1. 1. 1. 0.68571429]
|
|
|
|
mean value: 0.945952380952381
|
|
|
|
key: train_recall
|
|
value: [0.98422713 0.96845426 0.96845426 0.99685535 0.97484277 0.93396226
|
|
0.96855346 1. 1. 0.88993711]
|
|
|
|
mean value: 0.9685286590083924
|
|
|
|
key: test_roc_auc
|
|
value: [0.91547619 0.98571429 0.97142857 0.91587302 0.97222222 0.95714286
|
|
0.97142857 0.95714286 0.9 0.82857143]
|
|
|
|
mean value: 0.9375
|
|
|
|
key: train_roc_auc
|
|
value: [0.96695633 0.97164851 0.97479317 0.98580938 0.97953495 0.95751741
|
|
0.97798742 0.97641509 0.93710692 0.93710692]
|
|
|
|
mean value: 0.9664876098644922
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.97297297 0.94736842 0.84615385 0.94594595 0.91428571
|
|
0.94594595 0.92105263 0.83333333 0.66666667]
|
|
|
|
mean value: 0.883987932408985
|
|
|
|
key: train_jcc
|
|
value: [0.93693694 0.94461538 0.9504644 0.97239264 0.95975232 0.91666667
|
|
0.95652174 0.95495495 0.88826816 0.87616099]
|
|
|
|
mean value: 0.9356734185744097
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02006912 0.02525282 0.0232029 0.02457309 0.0337832 0.02687883
|
|
0.01846766 0.01992679 0.01890898 0.02257085]
|
|
|
|
mean value: 0.023363423347473145
|
|
|
|
key: score_time
|
|
value: [0.0117867 0.01181984 0.01192808 0.01179981 0.01225471 0.01192546
|
|
0.01174068 0.01182675 0.01173258 0.01182985]
|
|
|
|
mean value: 0.011864447593688964
|
|
|
|
key: test_mcc
|
|
value: [0.80588933 0.74766718 0.94511009 0.83095238 0.91885703 0.88880092
|
|
0.55391171 0.91766294 0.94440028 0.81649658]
|
|
|
|
mean value: 0.8369748433837096
|
|
|
|
key: train_mcc
|
|
value: [0.95279762 0.78609706 0.94649802 0.94360876 0.91536605 0.94120401
|
|
0.66275141 0.86603117 0.946583 0.94047946]
|
|
|
|
mean value: 0.8901416569006897
|
|
|
|
key: test_accuracy
|
|
value: [0.90140845 0.85915493 0.97183099 0.91549296 0.95774648 0.94366197
|
|
0.75714286 0.95714286 0.97142857 0.9 ]
|
|
|
|
mean value: 0.9135010060362173
|
|
|
|
key: train_accuracy
|
|
value: [0.97637795 0.88188976 0.97322835 0.97165354 0.95590551 0.97007874
|
|
0.80974843 0.93081761 0.97327044 0.97012579]
|
|
|
|
mean value: 0.9413096122418659
|
|
|
|
key: test_fscore
|
|
value: [0.89855072 0.87804878 0.97297297 0.91428571 0.95890411 0.94444444
|
|
0.70175439 0.95522388 0.97222222 0.88888889]
|
|
|
|
mean value: 0.9085296124090697
|
|
|
|
key: train_fscore
|
|
value: [0.97622821 0.89421721 0.97305864 0.97133758 0.95783133 0.97081413
|
|
0.76864245 0.92715232 0.97314376 0.96979332]
|
|
|
|
mean value: 0.938221893854117
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.7826087 0.94736842 0.91428571 0.92105263 0.91891892
|
|
0.90909091 1. 0.94594595 1. ]
|
|
|
|
mean value: 0.9278665175919181
|
|
|
|
key: train_precision
|
|
value: [0.98089172 0.80867347 0.97770701 0.98387097 0.91907514 0.94894895
|
|
0.9804878 0.97902098 0.97777778 0.9807074 ]
|
|
|
|
mean value: 0.9537161213877158
|
|
|
|
key: test_recall
|
|
value: [0.86111111 1. 1. 0.91428571 1. 0.97142857
|
|
0.57142857 0.91428571 1. 0.8 ]
|
|
|
|
mean value: 0.9032539682539682
|
|
|
|
key: train_recall
|
|
value: [0.97160883 1. 0.96845426 0.9591195 1. 0.99371069
|
|
0.63207547 0.88050314 0.96855346 0.9591195 ]
|
|
|
|
mean value: 0.9333144852488939
|
|
|
|
key: test_roc_auc
|
|
value: [0.90198413 0.85714286 0.97142857 0.91547619 0.95833333 0.94404762
|
|
0.75714286 0.95714286 0.97142857 0.9 ]
|
|
|
|
mean value: 0.9134126984126983
|
|
|
|
key: train_roc_auc
|
|
value: [0.97637045 0.88207547 0.97322084 0.97167331 0.95583596 0.97004147
|
|
0.80974843 0.93081761 0.97327044 0.97012579]
|
|
|
|
mean value: 0.9413179771045375
|
|
|
|
key: test_jcc
|
|
value: [0.81578947 0.7826087 0.94736842 0.84210526 0.92105263 0.89473684
|
|
0.54054054 0.91428571 0.94594595 0.8 ]
|
|
|
|
mean value: 0.8404433528003322
|
|
|
|
key: train_jcc
|
|
value: [0.95356037 0.80867347 0.94753086 0.94427245 0.91907514 0.94328358
|
|
0.6242236 0.86419753 0.94769231 0.94135802]
|
|
|
|
mean value: 0.8893867343253306
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21386051 0.19594049 0.19607854 0.19648504 0.19724059 0.19636226
|
|
0.19716811 0.19640207 0.19998503 0.19780946]
|
|
|
|
mean value: 0.19873321056365967
|
|
|
|
key: score_time
|
|
value: [0.015908 0.01565051 0.01572204 0.01579976 0.01585078 0.01635671
|
|
0.01572776 0.01586628 0.01603317 0.01614976]
|
|
|
|
mean value: 0.015906476974487306
|
|
|
|
key: test_mcc
|
|
value: [0.94511009 1. 0.97222222 0.9451949 0.9451949 1.
|
|
0.94440028 1. 0.91766294 0.97182532]
|
|
|
|
mean value: 0.9641610639455782
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 1. 0.98591549 0.97183099 0.97183099 1.
|
|
0.97142857 1. 0.95714286 0.98571429]
|
|
|
|
mean value: 0.981569416498994
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97297297 1. 0.98591549 0.97222222 0.97222222 1.
|
|
0.97222222 1. 0.95890411 0.98591549]
|
|
|
|
mean value: 0.9820374735144174
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94736842 1. 1. 0.94594595 0.94594595 1.
|
|
0.94594595 1. 0.92105263 0.97222222]
|
|
|
|
mean value: 0.9678481112691639
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9972222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97142857 1. 0.98611111 0.97222222 0.97222222 1.
|
|
0.97142857 1. 0.95714286 0.98571429]
|
|
|
|
mean value: 0.9816269841269841
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94736842 1. 0.97222222 0.94594595 0.94594595 1.
|
|
0.94594595 1. 0.92105263 0.97222222]
|
|
|
|
mean value: 0.9650703334913862
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08755255 0.08615255 0.0739615 0.10134792 0.08034801 0.07975864
|
|
0.09994006 0.09957004 0.10951209 0.10598111]
|
|
|
|
mean value: 0.09241244792938233
|
|
|
|
key: score_time
|
|
value: [0.02680802 0.02740717 0.03420234 0.02961206 0.02757835 0.03313065
|
|
0.03581285 0.02277327 0.03594804 0.04103637]
|
|
|
|
mean value: 0.03143091201782226
|
|
|
|
key: test_mcc
|
|
value: [0.97220047 1. 0.97222222 0.9451949 0.9451949 1.
|
|
0.91766294 1. 0.94440028 0.97182532]
|
|
|
|
mean value: 0.9668701014726617
|
|
|
|
key: train_mcc
|
|
value: [0.99685535 0.99372055 0.99685535 0.98742101 0.99685531 0.99059524
|
|
0.99686027 0.99373035 0.99686027 0.99061012]
|
|
|
|
mean value: 0.9940363821854736
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 1. 0.98591549 0.97183099 0.97183099 1.
|
|
0.95714286 1. 0.97142857 0.98571429]
|
|
|
|
mean value: 0.9829778672032193
|
|
|
|
key: train_accuracy
|
|
value: [0.9984252 0.99685039 0.9984252 0.99370079 0.9984252 0.99527559
|
|
0.99842767 0.99685535 0.99842767 0.99528302]
|
|
|
|
mean value: 0.9970096072896548
|
|
|
|
key: test_fscore
|
|
value: [0.98630137 1. 0.98591549 0.97222222 0.97222222 1.
|
|
0.95890411 1. 0.97222222 0.98591549]
|
|
|
|
mean value: 0.9833703132034214
|
|
|
|
key: train_fscore
|
|
value: [0.9984252 0.99685535 0.9984252 0.99373041 0.99843014 0.99530516
|
|
0.9984252 0.9968652 0.99843014 0.99530516]
|
|
|
|
mean value: 0.9970197158961464
|
|
|
|
key: test_precision
|
|
value: [0.97297297 1. 1. 0.94594595 0.94594595 1.
|
|
0.92105263 1. 0.94594595 0.97222222]
|
|
|
|
mean value: 0.970408566461198
|
|
|
|
key: train_precision
|
|
value: [0.99685535 0.99373041 0.99685535 0.990625 0.9968652 0.99065421
|
|
1. 0.99375 0.9968652 0.99065421]
|
|
|
|
mean value: 0.9946854918085875
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9972222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.99685535 1. 1.
|
|
0.99685535 1. 1. 1. ]
|
|
|
|
mean value: 0.9993710691823899
|
|
|
|
key: test_roc_auc
|
|
value: [0.98571429 1. 0.98611111 0.97222222 0.97222222 1.
|
|
0.95714286 1. 0.97142857 0.98571429]
|
|
|
|
mean value: 0.9830555555555556
|
|
|
|
key: train_roc_auc
|
|
value: [0.99842767 0.99685535 0.99842767 0.99369581 0.99842271 0.99526814
|
|
0.99842767 0.99685535 0.99842767 0.99528302]
|
|
|
|
mean value: 0.9970091066007976
|
|
|
|
key: test_jcc
|
|
value: [0.97297297 1. 0.97222222 0.94594595 0.94594595 1.
|
|
0.92105263 1. 0.94594595 0.97222222]
|
|
|
|
mean value: 0.9676307886834202
|
|
|
|
key: train_jcc
|
|
value: [0.99685535 0.99373041 0.99685535 0.98753894 0.9968652 0.99065421
|
|
0.99685535 0.99375 0.9968652 0.99065421]
|
|
|
|
mean value: 0.9940624204807793
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.25592828 0.28852034 0.26021767 0.28997707 0.26665974 0.26977062
|
|
0.26859713 0.1861093 0.16106367 0.26376057]
|
|
|
|
mean value: 0.25106043815612794
|
|
|
|
key: score_time
|
|
value: [0.02861762 0.02894425 0.02914667 0.02907515 0.02898312 0.03097677
|
|
0.03856277 0.02931809 0.02283049 0.03237629]
|
|
|
|
mean value: 0.02988312244415283
|
|
|
|
key: test_mcc
|
|
value: [0.9186708 0.91580648 0.88862624 0.74940548 0.86802778 0.85952381
|
|
0.84102145 0.84102145 0.84102145 0.80295507]
|
|
|
|
mean value: 0.8526079995133014
|
|
|
|
key: train_mcc
|
|
value: [0.9842961 0.98112197 0.98425689 0.97177328 0.97169643 0.98747967
|
|
0.9812097 0.99373035 0.98432053 0.9812097 ]
|
|
|
|
mean value: 0.9821094625606624
|
|
|
|
key: test_accuracy
|
|
value: [0.95774648 0.95774648 0.94366197 0.87323944 0.92957746 0.92957746
|
|
0.91428571 0.91428571 0.91428571 0.9 ]
|
|
|
|
mean value: 0.923440643863179
|
|
|
|
key: train_accuracy
|
|
value: [0.99212598 0.99055118 0.99212598 0.98582677 0.98582677 0.99370079
|
|
0.99056604 0.99685535 0.99213836 0.99056604]
|
|
|
|
mean value: 0.9910283266478482
|
|
|
|
key: test_fscore
|
|
value: [0.96 0.95890411 0.94594595 0.87671233 0.93333333 0.92957746
|
|
0.92105263 0.92105263 0.92105263 0.90410959]
|
|
|
|
mean value: 0.9271740666202114
|
|
|
|
key: train_fscore
|
|
value: [0.99215071 0.99056604 0.99212598 0.98595944 0.98591549 0.99375
|
|
0.990625 0.9968652 0.99217527 0.990625 ]
|
|
|
|
mean value: 0.991075813738669
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.94594595 0.92105263 0.84210526 0.875 0.91666667
|
|
0.85365854 0.85365854 0.85365854 0.86842105]
|
|
|
|
mean value: 0.8853244092814054
|
|
|
|
key: train_precision
|
|
value: [0.9875 0.98746082 0.99056604 0.97832817 0.98130841 0.98757764
|
|
0.98447205 0.99375 0.98753894 0.98447205]
|
|
|
|
mean value: 0.9862974117312842
|
|
|
|
key: test_recall
|
|
value: [1. 0.97222222 0.97222222 0.91428571 1. 0.94285714
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9744444444444444
|
|
|
|
key: train_recall
|
|
value: [0.99684543 0.99369085 0.99369085 0.99371069 0.99056604 1.
|
|
0.99685535 1. 0.99685535 0.99685535]
|
|
|
|
mean value: 0.9959069896633137
|
|
|
|
key: test_roc_auc
|
|
value: [0.95714286 0.95753968 0.94325397 0.87380952 0.93055556 0.9297619
|
|
0.91428571 0.91428571 0.91428571 0.9 ]
|
|
|
|
mean value: 0.9234920634920635
|
|
|
|
key: train_roc_auc
|
|
value: [0.9921334 0.99055612 0.99212844 0.98581434 0.9858193 0.99369085
|
|
0.99056604 0.99685535 0.99213836 0.99056604]
|
|
|
|
mean value: 0.9910268238001706
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 0.92105263 0.8974359 0.7804878 0.875 0.86842105
|
|
0.85365854 0.85365854 0.85365854 0.825 ]
|
|
|
|
mean value: 0.8651449919357493
|
|
|
|
key: train_jcc
|
|
value: [0.98442368 0.98130841 0.984375 0.97230769 0.97222222 0.98757764
|
|
0.98142415 0.99375 0.98447205 0.98142415]
|
|
|
|
mean value: 0.9823284988411944
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.82525992 0.81345201 0.79970884 0.79577041 0.80171442 0.79265976
|
|
0.80652809 0.81720066 0.80532098 0.81509423]
|
|
|
|
mean value: 0.8072709321975708
|
|
|
|
key: score_time
|
|
value: [0.01005793 0.01052213 0.00961161 0.00976348 0.00990582 0.00951743
|
|
0.00955701 0.00936961 0.01069903 0.01045966]
|
|
|
|
mean value: 0.009946370124816894
|
|
|
|
key: test_mcc
|
|
value: [0.94511009 1. 0.97222222 0.9451949 0.9451949 1.
|
|
0.91766294 1. 0.94440028 0.97182532]
|
|
|
|
mean value: 0.9641610639455782
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 1. 0.98591549 0.97183099 0.97183099 1.
|
|
0.95714286 1. 0.97142857 0.98571429]
|
|
|
|
mean value: 0.981569416498994
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97297297 1. 0.98591549 0.97222222 0.97222222 1.
|
|
0.95890411 1. 0.97222222 0.98591549]
|
|
|
|
mean value: 0.9820374735144174
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94736842 1. 1. 0.94594595 0.94594595 1.
|
|
0.92105263 1. 0.94594595 0.97222222]
|
|
|
|
mean value: 0.9678481112691639
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9972222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97142857 1. 0.98611111 0.97222222 0.97222222 1.
|
|
0.95714286 1. 0.97142857 0.98571429]
|
|
|
|
mean value: 0.9816269841269841
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94736842 1. 0.97222222 0.94594595 0.94594595 1.
|
|
0.92105263 1. 0.94594595 0.97222222]
|
|
|
|
mean value: 0.9650703334913862
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03670049 0.03937697 0.03461599 0.03756452 0.03761768 0.03710341
|
|
0.03756785 0.03759885 0.03583026 0.03527403]
|
|
|
|
mean value: 0.036925005912780764
|
|
|
|
key: score_time
|
|
value: [0.0131886 0.01521206 0.01581287 0.01318264 0.01519966 0.01524711
|
|
0.01334047 0.01519775 0.01604915 0.01971245]
|
|
|
|
mean value: 0.015214276313781739
|
|
|
|
key: test_mcc
|
|
value: [1. 0.94511009 0.94365079 0.88862624 1. 0.88730159
|
|
1. 0.97182532 0.97182532 0.91465912]
|
|
|
|
mean value: 0.9522998461647237
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97183099 0.97183099 0.94366197 1. 0.94366197
|
|
1. 0.98571429 0.98571429 0.95714286]
|
|
|
|
mean value: 0.9759557344064387
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97297297 0.97222222 0.94117647 1. 0.94285714
|
|
1. 0.98591549 0.98591549 0.95652174]
|
|
|
|
mean value: 0.9757581533686501
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.94736842 0.97222222 0.96969697 1. 0.94285714
|
|
1. 0.97222222 0.97222222 0.97058824]
|
|
|
|
mean value: 0.9747177435567529
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97222222 0.91428571 1. 0.94285714
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9772222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97142857 0.9718254 0.94325397 1. 0.94365079
|
|
1. 0.98571429 0.98571429 0.95714286]
|
|
|
|
mean value: 0.9758730158730159
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.94736842 0.94594595 0.88888889 1. 0.89189189
|
|
1. 0.97222222 0.97222222 0.91666667]
|
|
|
|
mean value: 0.953520625889047
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02871919 0.01736236 0.01696849 0.01708817 0.03683257 0.01714468
|
|
0.0171876 0.04192972 0.03505635 0.04124236]
|
|
|
|
mean value: 0.02695314884185791
|
|
|
|
key: score_time
|
|
value: [0.01955438 0.01248503 0.01221538 0.0124445 0.01955557 0.01251626
|
|
0.01397395 0.02384067 0.02429843 0.02438879]
|
|
|
|
mean value: 0.017527294158935548
|
|
|
|
key: test_mcc
|
|
value: [0.86237318 0.97220047 1. 0.8594125 0.88880092 0.94511009
|
|
0.91465912 0.94285714 0.91465912 0.82992752]
|
|
|
|
mean value: 0.9130000059173128
|
|
|
|
key: train_mcc
|
|
value: [0.94960617 0.92137196 0.93072627 0.9559054 0.93408743 0.93078373
|
|
0.93098334 0.92778765 0.92778765 0.94025622]
|
|
|
|
mean value: 0.9349295824637631
|
|
|
|
key: test_accuracy
|
|
value: [0.92957746 0.98591549 1. 0.92957746 0.94366197 0.97183099
|
|
0.95714286 0.97142857 0.95714286 0.91428571]
|
|
|
|
mean value: 0.956056338028169
|
|
|
|
key: train_accuracy
|
|
value: [0.97480315 0.96062992 0.96535433 0.97795276 0.96692913 0.96535433
|
|
0.96540881 0.96383648 0.96383648 0.97012579]
|
|
|
|
mean value: 0.9674231169217056
|
|
|
|
key: test_fscore
|
|
value: [0.92753623 0.98630137 1. 0.92753623 0.94444444 0.97058824
|
|
0.95774648 0.97142857 0.95774648 0.91176471]
|
|
|
|
mean value: 0.9555092748427095
|
|
|
|
key: train_fscore
|
|
value: [0.97476341 0.96025437 0.96518987 0.97798742 0.96661367 0.96518987
|
|
0.96507937 0.96354992 0.96354992 0.97007874]
|
|
|
|
mean value: 0.9672256566432498
|
|
|
|
key: test_precision
|
|
value: [0.96969697 0.97297297 1. 0.94117647 0.91891892 1.
|
|
0.94444444 0.97142857 0.94444444 0.93939394]
|
|
|
|
mean value: 0.9602476731888496
|
|
|
|
key: train_precision
|
|
value: [0.97476341 0.96794872 0.96825397 0.97798742 0.97749196 0.97133758
|
|
0.97435897 0.97124601 0.97124601 0.97160883]
|
|
|
|
mean value: 0.972624287550512
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 0.91428571 0.97142857 0.94285714
|
|
0.97142857 0.97142857 0.97142857 0.88571429]
|
|
|
|
mean value: 0.9517460317460318
|
|
|
|
key: train_recall
|
|
value: [0.97476341 0.95268139 0.96214511 0.97798742 0.95597484 0.9591195
|
|
0.95597484 0.95597484 0.95597484 0.96855346]
|
|
|
|
mean value: 0.9619149653790449
|
|
|
|
key: test_roc_auc
|
|
value: [0.93015873 0.98571429 1. 0.92936508 0.94404762 0.97142857
|
|
0.95714286 0.97142857 0.95714286 0.91428571]
|
|
|
|
mean value: 0.9560714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.97480309 0.96061742 0.96534928 0.9779527 0.96694641 0.96536416
|
|
0.96540881 0.96383648 0.96383648 0.97012579]
|
|
|
|
mean value: 0.967424062059798
|
|
|
|
key: test_jcc
|
|
value: [0.86486486 0.97297297 1. 0.86486486 0.89473684 0.94285714
|
|
0.91891892 0.94444444 0.91891892 0.83783784]
|
|
|
|
mean value: 0.9160416807785229
|
|
|
|
key: train_jcc
|
|
value: [0.95076923 0.9235474 0.93272171 0.95692308 0.93538462 0.93272171
|
|
0.93251534 0.92966361 0.92966361 0.94189602]
|
|
|
|
mean value: 0.9365806327778523
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.31593418 0.31997061 0.32746148 0.32468939 0.33370972 0.19828033
|
|
0.43811512 0.31964898 0.26189637 0.35649157]
|
|
|
|
mean value: 0.3196197748184204
|
|
|
|
key: score_time
|
|
value: [0.01891088 0.01906037 0.0190239 0.01910853 0.02047801 0.01225448
|
|
0.0189395 0.01237011 0.02282858 0.01899767]
|
|
|
|
mean value: 0.01819720268249512
|
|
|
|
key: test_mcc
|
|
value: [0.86237318 1. 1. 0.8594125 0.88880092 0.94511009
|
|
0.91465912 0.94285714 0.91465912 0.82992752]
|
|
|
|
mean value: 0.9157799593644673
|
|
|
|
key: train_mcc
|
|
value: [0.94960617 0.9401617 0.93072627 0.9559054 0.93408743 0.93078373
|
|
0.93098334 0.92778765 0.92778765 0.94025622]
|
|
|
|
mean value: 0.9368085564916796
|
|
|
|
key: test_accuracy
|
|
value: [0.92957746 1. 1. 0.92957746 0.94366197 0.97183099
|
|
0.95714286 0.97142857 0.95714286 0.91428571]
|
|
|
|
mean value: 0.9574647887323944
|
|
|
|
key: train_accuracy
|
|
value: [0.97480315 0.97007874 0.96535433 0.97795276 0.96692913 0.96535433
|
|
0.96540881 0.96383648 0.96383648 0.97012579]
|
|
|
|
mean value: 0.9683679988114693
|
|
|
|
key: test_fscore
|
|
value: [0.92753623 1. 1. 0.92753623 0.94444444 0.97058824
|
|
0.95774648 0.97142857 0.95774648 0.91176471]
|
|
|
|
mean value: 0.9568791378564081
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:155: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:158: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.97476341 0.9699842 0.96518987 0.97798742 0.96661367 0.96518987
|
|
0.96507937 0.96354992 0.96354992 0.97007874]
|
|
|
|
mean value: 0.968198639662511
|
|
|
|
key: test_precision
|
|
value: [0.96969697 1. 1. 0.94117647 0.91891892 1.
|
|
0.94444444 0.97142857 0.94444444 0.93939394]
|
|
|
|
mean value: 0.9629503758915524
|
|
|
|
key: train_precision
|
|
value: [0.97476341 0.97151899 0.96825397 0.97798742 0.97749196 0.97133758
|
|
0.97435897 0.97124601 0.97124601 0.97160883]
|
|
|
|
mean value: 0.9729813144898175
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 0.91428571 0.97142857 0.94285714
|
|
0.97142857 0.97142857 0.97142857 0.88571429]
|
|
|
|
mean value: 0.9517460317460318
|
|
|
|
key: train_recall
|
|
value: [0.97476341 0.96845426 0.96214511 0.97798742 0.95597484 0.9591195
|
|
0.95597484 0.95597484 0.95597484 0.96855346]
|
|
|
|
mean value: 0.9634922524452909
|
|
|
|
key: test_roc_auc
|
|
value: [0.93015873 1. 1. 0.92936508 0.94404762 0.97142857
|
|
0.95714286 0.97142857 0.95714286 0.91428571]
|
|
|
|
mean value: 0.9575
|
|
|
|
key: train_roc_auc
|
|
value: [0.97480309 0.97007619 0.96534928 0.9779527 0.96694641 0.96536416
|
|
0.96540881 0.96383648 0.96383648 0.97012579]
|
|
|
|
mean value: 0.9683699382973235
|
|
|
|
key: test_jcc
|
|
value: [0.86486486 1. 1. 0.86486486 0.89473684 0.94285714
|
|
0.91891892 0.94444444 0.91891892 0.83783784]
|
|
|
|
mean value: 0.9187443834812256
|
|
|
|
key: train_jcc
|
|
value: [0.95076923 0.94171779 0.93272171 0.95692308 0.93538462 0.93272171
|
|
0.93251534 0.92966361 0.92966361 0.94189602]
|
|
|
|
mean value: 0.9383976718577945
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02395487 0.03129625 0.03210497 0.02768111 0.02957845 0.04382205
|
|
0.03382683 0.04950786 0.05163479 0.04825044]
|
|
|
|
mean value: 0.03716576099395752
|
|
|
|
key: score_time
|
|
value: [0.01181412 0.01170874 0.01187897 0.01177502 0.01178217 0.01199937
|
|
0.01903296 0.01207232 0.01193404 0.01187944]
|
|
|
|
mean value: 0.012587714195251464
|
|
|
|
key: test_mcc
|
|
value: [0.68543653 0.78888889 0.80903983 0.80507649 0.80507649 1.
|
|
0.58655573 0.78888889 0.89893315 0.68888889]
|
|
|
|
mean value: 0.7856784875651004
|
|
|
|
key: train_mcc
|
|
value: [0.90744828 0.88526575 0.87287753 0.87190373 0.88403644 0.87185133
|
|
0.89526317 0.87279143 0.87279143 0.89526317]
|
|
|
|
mean value: 0.8829492264486184
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.89473684 0.89473684 0.89473684 0.89473684 1.
|
|
0.78947368 0.89473684 0.94736842 0.84210526]
|
|
|
|
mean value: 0.8894736842105263
|
|
|
|
key: train_accuracy
|
|
value: [0.95321637 0.94152047 0.93567251 0.93567251 0.94152047 0.93567251
|
|
0.94736842 0.93567251 0.93567251 0.94736842]
|
|
|
|
mean value: 0.9409356725146198
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.88888889 0.9 0.875 0.875 1.
|
|
0.81818182 0.9 0.95238095 0.84210526]
|
|
|
|
mean value: 0.887508633437426
|
|
|
|
key: train_fscore
|
|
value: [0.95238095 0.93975904 0.93413174 0.93491124 0.94047619 0.93413174
|
|
0.94610778 0.93333333 0.93333333 0.94610778]
|
|
|
|
mean value: 0.9394673130188106
|
|
|
|
key: test_precision
|
|
value: [0.875 0.88888889 0.81818182 1. 1. 1.
|
|
0.75 0.9 0.90909091 0.88888889]
|
|
|
|
mean value: 0.9030050505050505
|
|
|
|
key: train_precision
|
|
value: [0.97560976 0.975 0.96296296 0.95180723 0.96341463 0.95121951
|
|
0.96341463 0.9625 0.9625 0.96341463]
|
|
|
|
mean value: 0.9631843362610333
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.88888889 1. 0.77777778 0.77777778 1.
|
|
0.9 0.9 1. 0.8 ]
|
|
|
|
mean value: 0.8822222222222222
|
|
|
|
key: train_recall
|
|
value: [0.93023256 0.90697674 0.90697674 0.91860465 0.91860465 0.91764706
|
|
0.92941176 0.90588235 0.90588235 0.92941176]
|
|
|
|
mean value: 0.9169630642954856
|
|
|
|
key: test_roc_auc
|
|
value: [0.83888889 0.89444444 0.9 0.88888889 0.88888889 1.
|
|
0.78333333 0.89444444 0.94444444 0.84444444]
|
|
|
|
mean value: 0.8877777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [0.95335157 0.94172367 0.93584131 0.93577291 0.94165527 0.93556772
|
|
0.94726402 0.93549932 0.93549932 0.94726402]
|
|
|
|
mean value: 0.9409439124487005
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.8 0.81818182 0.77777778 0.77777778 1.
|
|
0.69230769 0.81818182 0.90909091 0.72727273]
|
|
|
|
mean value: 0.802059052059052
|
|
|
|
key: train_jcc
|
|
value: [0.90909091 0.88636364 0.87640449 0.87777778 0.88764045 0.87640449
|
|
0.89772727 0.875 0.875 0.89772727]
|
|
|
|
mean value: 0.8859136306889116
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.75773287 0.74368167 0.88063335 0.7733438 0.71527219 0.81070018
|
|
0.72211838 0.75942755 0.90827298 0.71872997]
|
|
|
|
mean value: 0.7789912939071655
|
|
|
|
key: score_time
|
|
value: [0.01832628 0.01247096 0.01224923 0.01197457 0.01687717 0.01200843
|
|
0.01196647 0.01214218 0.01520514 0.0153594 ]
|
|
|
|
mean value: 0.01385798454284668
|
|
|
|
key: test_mcc
|
|
value: [0.78888889 0.78888889 0.9 0.80507649 0.80507649 0.89893315
|
|
0.57777778 0.78888889 0.78888889 0.80903983]
|
|
|
|
mean value: 0.795145929003904
|
|
|
|
key: train_mcc
|
|
value: [0.97660739 1. 0.97660739 0.95321477 0.97660739 0.98837051
|
|
0.94157888 0.98837051 0.97660739 1. ]
|
|
|
|
mean value: 0.9777964229760016
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.89473684 0.94736842 0.89473684 0.89473684 0.94736842
|
|
0.78947368 0.89473684 0.89473684 0.89473684]
|
|
|
|
mean value: 0.8947368421052632
|
|
|
|
key: train_accuracy
|
|
value: [0.98830409 1. 0.98830409 0.97660819 0.98830409 0.99415205
|
|
0.97076023 0.99415205 0.98830409 1. ]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.88888889 0.94736842 0.875 0.875 0.95238095
|
|
0.8 0.9 0.9 0.88888889]
|
|
|
|
mean value: 0.8916416040100251
|
|
|
|
key: train_fscore
|
|
value: [0.98837209 1. 0.98837209 0.97674419 0.98837209 0.99408284
|
|
0.9704142 0.99408284 0.98823529 1. ]
|
|
|
|
mean value: 0.9888675640890731
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.88888889 0.9 1. 1. 0.90909091
|
|
0.8 0.9 0.9 1. ]
|
|
|
|
mean value: 0.9186868686868687
|
|
|
|
key: train_precision
|
|
value: [0.98837209 1. 0.98837209 0.97674419 0.98837209 1.
|
|
0.97619048 1. 0.98823529 1. ]
|
|
|
|
mean value: 0.9906286235424402
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 1. 0.77777778 0.77777778 1.
|
|
0.8 0.9 0.9 0.8 ]
|
|
|
|
mean value: 0.8733333333333333
|
|
|
|
key: train_recall
|
|
value: [0.98837209 1. 0.98837209 0.97674419 0.98837209 0.98823529
|
|
0.96470588 0.98823529 0.98823529 1. ]
|
|
|
|
mean value: 0.9871272229822161
|
|
|
|
key: test_roc_auc
|
|
value: [0.89444444 0.89444444 0.95 0.88888889 0.88888889 0.94444444
|
|
0.78888889 0.89444444 0.89444444 0.9 ]
|
|
|
|
mean value: 0.8938888888888888
|
|
|
|
key: train_roc_auc
|
|
value: [0.98830369 1. 0.98830369 0.97660739 0.98830369 0.99411765
|
|
0.97072503 0.99411765 0.98830369 1. ]
|
|
|
|
mean value: 0.9888782489740082
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.8 0.9 0.77777778 0.77777778 0.90909091
|
|
0.66666667 0.81818182 0.81818182 0.8 ]
|
|
|
|
mean value: 0.8067676767676768
|
|
|
|
key: train_jcc
|
|
value: [0.97701149 1. 0.97701149 0.95454545 0.97701149 0.98823529
|
|
0.94252874 0.98823529 0.97674419 1. ]
|
|
|
|
mean value: 0.9781323447218065
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01315546 0.01119113 0.0091815 0.00966144 0.00872302 0.0090394
|
|
0.00876021 0.00876713 0.00875735 0.00854087]
|
|
|
|
mean value: 0.009577751159667969
|
|
|
|
key: score_time
|
|
value: [0.01169705 0.00926828 0.0088644 0.00899458 0.00899673 0.00860763
|
|
0.00848794 0.00871325 0.0084095 0.00863767]
|
|
|
|
mean value: 0.009067702293395995
|
|
|
|
key: test_mcc
|
|
value: [0.48934516 0.48934516 0.68543653 0.26257545 0.36666667 0.9
|
|
0.59554321 0.68888889 0.57777778 0.26666667]
|
|
|
|
mean value: 0.5322245520348511
|
|
|
|
key: train_mcc
|
|
value: [0.77391396 0.68829478 0.76087645 0.67564356 0.73250815 0.7184589
|
|
0.74764381 0.74224525 0.70050256 0.78049439]
|
|
|
|
mean value: 0.7320581797757397
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.73684211 0.84210526 0.63157895 0.68421053 0.94736842
|
|
0.78947368 0.84210526 0.78947368 0.63157895]
|
|
|
|
mean value: 0.763157894736842
|
|
|
|
key: train_accuracy
|
|
value: [0.88304094 0.83625731 0.87719298 0.83625731 0.85964912 0.85380117
|
|
0.87134503 0.86549708 0.84795322 0.88888889]
|
|
|
|
mean value: 0.8619883040935672
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.82352941 0.53333333 0.66666667 0.94736842
|
|
0.77777778 0.84210526 0.8 0.63157895]
|
|
|
|
mean value: 0.7355693154454764
|
|
|
|
key: train_fscore
|
|
value: [0.875 0.81818182 0.86956522 0.82926829 0.84615385 0.83870968
|
|
0.8625 0.8516129 0.8375 0.89265537]
|
|
|
|
mean value: 0.8521147122286695
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.875 0.66666667 0.66666667 1.
|
|
0.875 0.88888889 0.8 0.66666667]
|
|
|
|
mean value: 0.8105555555555556
|
|
|
|
key: train_precision
|
|
value: [0.94594595 0.92647059 0.93333333 0.87179487 0.94285714 0.92857143
|
|
0.92 0.94285714 0.89333333 0.85869565]
|
|
|
|
mean value: 0.9163859439102406
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.55555556 0.77777778 0.44444444 0.66666667 0.9
|
|
0.7 0.8 0.8 0.6 ]
|
|
|
|
mean value: 0.68
|
|
|
|
key: train_recall
|
|
value: [0.81395349 0.73255814 0.81395349 0.79069767 0.76744186 0.76470588
|
|
0.81176471 0.77647059 0.78823529 0.92941176]
|
|
|
|
mean value: 0.7989192886456908
|
|
|
|
key: test_roc_auc
|
|
value: [0.72777778 0.72777778 0.83888889 0.62222222 0.68333333 0.95
|
|
0.79444444 0.84444444 0.78888889 0.63333333]
|
|
|
|
mean value: 0.7611111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.88344733 0.83686731 0.87756498 0.83652531 0.86019152 0.85328317
|
|
0.87099863 0.86497948 0.84760602 0.88912449]
|
|
|
|
mean value: 0.8620588235294118
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.7 0.36363636 0.5 0.9
|
|
0.63636364 0.72727273 0.66666667 0.46153846]
|
|
|
|
mean value: 0.5955477855477855
|
|
|
|
key: train_jcc
|
|
value: [0.77777778 0.69230769 0.76923077 0.70833333 0.73333333 0.72222222
|
|
0.75824176 0.74157303 0.72043011 0.80612245]
|
|
|
|
mean value: 0.7429572476661225
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00893593 0.00896478 0.00906038 0.00892711 0.00910783 0.00901365
|
|
0.00889587 0.0092268 0.00915647 0.008919 ]
|
|
|
|
mean value: 0.009020781517028809
|
|
|
|
key: score_time
|
|
value: [0.00883269 0.00847888 0.00873971 0.0087831 0.0085485 0.00866008
|
|
0.00875974 0.00865173 0.00861263 0.0088172 ]
|
|
|
|
mean value: 0.00868842601776123
|
|
|
|
key: test_mcc
|
|
value: [0.05555556 0.26666667 0.57777778 0.02721655 0.36803496 0.47777778
|
|
0.41773368 0.4719399 0.26666667 0.15555556]
|
|
|
|
mean value: 0.3084925097927452
|
|
|
|
key: train_mcc
|
|
value: [0.61768697 0.55968039 0.5683567 0.61768697 0.5872471 0.5701417
|
|
0.63393269 0.52117999 0.58646061 0.58264941]
|
|
|
|
mean value: 0.5845022535735144
|
|
|
|
key: test_accuracy
|
|
value: [0.52631579 0.63157895 0.78947368 0.52631579 0.68421053 0.73684211
|
|
0.68421053 0.73684211 0.63157895 0.57894737]
|
|
|
|
mean value: 0.6526315789473685
|
|
|
|
key: train_accuracy
|
|
value: [0.80701754 0.77777778 0.78362573 0.80701754 0.78947368 0.78362573
|
|
0.8128655 0.76023392 0.78947368 0.78947368]
|
|
|
|
mean value: 0.7900584795321637
|
|
|
|
key: test_fscore
|
|
value: [0.52631579 0.63157895 0.77777778 0.30769231 0.625 0.73684211
|
|
0.625 0.76190476 0.63157895 0.6 ]
|
|
|
|
mean value: 0.6223690636848531
|
|
|
|
key: train_fscore
|
|
value: [0.79754601 0.7654321 0.77844311 0.79754601 0.7721519 0.77018634
|
|
0.79487179 0.75151515 0.76923077 0.775 ]
|
|
|
|
mean value: 0.7771923186833384
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 0.77777778 0.5 0.71428571 0.77777778
|
|
0.83333333 0.72727273 0.66666667 0.6 ]
|
|
|
|
mean value: 0.6697113997113997
|
|
|
|
key: train_precision
|
|
value: [0.84415584 0.81578947 0.80246914 0.84415584 0.84722222 0.81578947
|
|
0.87323944 0.775 0.84507042 0.82666667]
|
|
|
|
mean value: 0.8289558519526397
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.66666667 0.77777778 0.22222222 0.55555556 0.7
|
|
0.5 0.8 0.6 0.6 ]
|
|
|
|
mean value: 0.5977777777777777
|
|
|
|
key: train_recall
|
|
value: [0.75581395 0.72093023 0.75581395 0.75581395 0.70930233 0.72941176
|
|
0.72941176 0.72941176 0.70588235 0.72941176]
|
|
|
|
mean value: 0.7321203830369357
|
|
|
|
key: test_roc_auc
|
|
value: [0.52777778 0.63333333 0.78888889 0.51111111 0.67777778 0.73888889
|
|
0.69444444 0.73333333 0.63333333 0.57777778]
|
|
|
|
mean value: 0.6516666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.80731874 0.77811218 0.78378933 0.80731874 0.78994528 0.78331053
|
|
0.8123803 0.76005472 0.78898769 0.78912449]
|
|
|
|
mean value: 0.7900341997264022
|
|
|
|
key: test_jcc
|
|
value: [0.35714286 0.46153846 0.63636364 0.18181818 0.45454545 0.58333333
|
|
0.45454545 0.61538462 0.46153846 0.42857143]
|
|
|
|
mean value: 0.4634781884781885
|
|
|
|
key: train_jcc
|
|
value: [0.66326531 0.62 0.6372549 0.66326531 0.62886598 0.62626263
|
|
0.65957447 0.60194175 0.625 0.63265306]
|
|
|
|
mean value: 0.6358083396732164
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00904465 0.00897193 0.008564 0.00962663 0.00982475 0.00981307
|
|
0.00969338 0.00960326 0.00960946 0.01008272]
|
|
|
|
mean value: 0.00948338508605957
|
|
|
|
key: score_time
|
|
value: [0.01033974 0.00971007 0.00979686 0.01071978 0.01066327 0.01075411
|
|
0.01082349 0.0102222 0.01070738 0.01111317]
|
|
|
|
mean value: 0.010485005378723145
|
|
|
|
key: test_mcc
|
|
value: [-0.16854997 0.48934516 0.4719399 0.03580574 0.4719399 0.04494666
|
|
0.26666667 0.78888889 0.26666667 0.28752732]
|
|
|
|
mean value: 0.2955176945574065
|
|
|
|
key: train_mcc
|
|
value: [0.56355773 0.51424001 0.55822989 0.5872471 0.46880023 0.54660374
|
|
0.57166923 0.4696969 0.53051107 0.56272457]
|
|
|
|
mean value: 0.5373280467801085
|
|
|
|
key: test_accuracy
|
|
value: [0.42105263 0.73684211 0.73684211 0.52631579 0.73684211 0.52631579
|
|
0.63157895 0.89473684 0.63157895 0.63157895]
|
|
|
|
mean value: 0.6473684210526316
|
|
|
|
key: train_accuracy
|
|
value: [0.77777778 0.75438596 0.77777778 0.78947368 0.73099415 0.77192982
|
|
0.78362573 0.73099415 0.76023392 0.77777778]
|
|
|
|
mean value: 0.7654970760233918
|
|
|
|
key: test_fscore
|
|
value: [0.35294118 0.66666667 0.70588235 0.4 0.70588235 0.57142857
|
|
0.63157895 0.9 0.63157895 0.58823529]
|
|
|
|
mean value: 0.6154194309302669
|
|
|
|
key: train_fscore
|
|
value: [0.75949367 0.7375 0.76829268 0.7721519 0.70886076 0.75776398
|
|
0.7672956 0.7012987 0.73202614 0.75641026]
|
|
|
|
mean value: 0.7461093686180117
|
|
|
|
key: test_precision
|
|
value: [0.375 0.83333333 0.75 0.5 0.75 0.54545455
|
|
0.66666667 0.9 0.66666667 0.71428571]
|
|
|
|
mean value: 0.6701406926406926
|
|
|
|
key: train_precision
|
|
value: [0.83333333 0.7972973 0.80769231 0.84722222 0.77777778 0.80263158
|
|
0.82432432 0.7826087 0.82352941 0.83098592]
|
|
|
|
mean value: 0.8127402864504468
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.55555556 0.66666667 0.33333333 0.66666667 0.6
|
|
0.6 0.9 0.6 0.5 ]
|
|
|
|
mean value: 0.5755555555555556
|
|
|
|
key: train_recall
|
|
value: [0.69767442 0.68604651 0.73255814 0.70930233 0.65116279 0.71764706
|
|
0.71764706 0.63529412 0.65882353 0.69411765]
|
|
|
|
mean value: 0.6900273597811217
|
|
|
|
key: test_roc_auc
|
|
value: [0.41666667 0.72777778 0.73333333 0.51666667 0.73333333 0.52222222
|
|
0.63333333 0.89444444 0.63333333 0.63888889]
|
|
|
|
mean value: 0.645
|
|
|
|
key: train_roc_auc
|
|
value: [0.77824897 0.75478796 0.77804378 0.78994528 0.73146375 0.77161423
|
|
0.78324213 0.73043776 0.75964432 0.77729138]
|
|
|
|
mean value: 0.7654719562243502
|
|
|
|
key: test_jcc
|
|
value: [0.21428571 0.5 0.54545455 0.25 0.54545455 0.4
|
|
0.46153846 0.81818182 0.46153846 0.41666667]
|
|
|
|
mean value: 0.4613120213120213
|
|
|
|
key: train_jcc
|
|
value: [0.6122449 0.58415842 0.62376238 0.62886598 0.54901961 0.61
|
|
0.62244898 0.54 0.57731959 0.60824742]
|
|
|
|
mean value: 0.5956067267164087
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01331568 0.01262593 0.01111817 0.01121378 0.01135492 0.01098537
|
|
0.01094937 0.01100969 0.01094222 0.01237082]
|
|
|
|
mean value: 0.011588597297668457
|
|
|
|
key: score_time
|
|
value: [0.01058674 0.00929952 0.00931764 0.00944519 0.00937819 0.00932002
|
|
0.00939965 0.00942421 0.00946426 0.00933266]
|
|
|
|
mean value: 0.009496808052062988
|
|
|
|
key: test_mcc
|
|
value: [0.57777778 0.78888889 0.59554321 0.48934516 0.68543653 0.78888889
|
|
0.36666667 0.80507649 0.80507649 0.38204659]
|
|
|
|
mean value: 0.6284746685357325
|
|
|
|
key: train_mcc
|
|
value: [0.82509929 0.80260349 0.78971132 0.84850151 0.77829485 0.81369939
|
|
0.8382529 0.81479279 0.8055213 0.82459982]
|
|
|
|
mean value: 0.8141076662995883
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.89473684 0.78947368 0.73684211 0.84210526 0.89473684
|
|
0.68421053 0.89473684 0.89473684 0.68421053]
|
|
|
|
mean value: 0.8105263157894737
|
|
|
|
key: train_accuracy
|
|
value: [0.9122807 0.9005848 0.89473684 0.92397661 0.88888889 0.90643275
|
|
0.91812865 0.90643275 0.9005848 0.9122807 ]
|
|
|
|
mean value: 0.9064327485380117
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.88888889 0.8 0.66666667 0.82352941 0.9
|
|
0.7 0.90909091 0.90909091 0.66666667]
|
|
|
|
mean value: 0.8041711229946524
|
|
|
|
key: train_fscore
|
|
value: [0.9112426 0.89820359 0.89411765 0.92307692 0.88757396 0.90361446
|
|
0.91463415 0.90243902 0.89440994 0.9112426 ]
|
|
|
|
mean value: 0.9040554900998983
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.88888889 0.72727273 0.83333333 0.875 0.9
|
|
0.7 0.83333333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.8118939393939394
|
|
|
|
key: train_precision
|
|
value: [0.92771084 0.92592593 0.9047619 0.93975904 0.90361446 0.92592593
|
|
0.94936709 0.93670886 0.94736842 0.91666667]
|
|
|
|
mean value: 0.9277809131049541
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.88888889 0.88888889 0.55555556 0.77777778 0.9
|
|
0.7 1. 1. 0.6 ]
|
|
|
|
mean value: 0.8088888888888889
|
|
|
|
key: train_recall
|
|
value: [0.89534884 0.87209302 0.88372093 0.90697674 0.87209302 0.88235294
|
|
0.88235294 0.87058824 0.84705882 0.90588235]
|
|
|
|
mean value: 0.8818467852257181
|
|
|
|
key: test_roc_auc
|
|
value: [0.78888889 0.89444444 0.79444444 0.72777778 0.83888889 0.89444444
|
|
0.68333333 0.88888889 0.88888889 0.68888889]
|
|
|
|
mean value: 0.8088888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.9123803 0.90075239 0.89480164 0.92407661 0.88898769 0.90629275
|
|
0.91792066 0.90622435 0.9002736 0.9122435 ]
|
|
|
|
mean value: 0.9063953488372093
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.8 0.66666667 0.5 0.7 0.81818182
|
|
0.53846154 0.83333333 0.83333333 0.5 ]
|
|
|
|
mean value: 0.6826340326340327
|
|
|
|
key: train_jcc
|
|
value: [0.83695652 0.81521739 0.80851064 0.85714286 0.79787234 0.82417582
|
|
0.84269663 0.82222222 0.80898876 0.83695652]
|
|
|
|
mean value: 0.8250739710305344
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.66450095 0.82026672 0.6703105 0.65101886 0.7942102 0.69545007
|
|
0.67278552 0.74140048 0.67537236 0.69364905]
|
|
|
|
mean value: 0.7078964710235596
|
|
|
|
key: score_time
|
|
value: [0.01510215 0.01506615 0.01508236 0.01213789 0.01506519 0.0123105
|
|
0.01212645 0.02084422 0.02217388 0.01866794]
|
|
|
|
mean value: 0.015857672691345213
|
|
|
|
key: test_mcc
|
|
value: [0.36666667 0.36666667 0.78888889 0.48934516 0.71611487 0.68888889
|
|
0.36803496 0.68543653 0.68543653 0.47777778]
|
|
|
|
mean value: 0.5633256945513115
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.68421053 0.89473684 0.73684211 0.84210526 0.84210526
|
|
0.68421053 0.84210526 0.84210526 0.73684211]
|
|
|
|
mean value: 0.7789473684210526
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.88888889 0.66666667 0.8 0.84210526
|
|
0.72727273 0.85714286 0.85714286 0.73684211]
|
|
|
|
mean value: 0.7709394698868383
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.88888889 0.83333333 1. 0.88888889
|
|
0.66666667 0.81818182 0.81818182 0.77777778]
|
|
|
|
mean value: 0.8025252525252525
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.88888889 0.55555556 0.66666667 0.8
|
|
0.8 0.9 0.9 0.7 ]
|
|
|
|
mean value: 0.7544444444444445
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.68333333 0.68333333 0.89444444 0.72777778 0.83333333 0.84444444
|
|
0.67777778 0.83888889 0.83888889 0.73888889]
|
|
|
|
mean value: 0.7761111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.8 0.5 0.66666667 0.72727273
|
|
0.57142857 0.75 0.75 0.58333333]
|
|
|
|
mean value: 0.6348701298701299
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01778102 0.0147562 0.01479435 0.01400495 0.01337886 0.01345778
|
|
0.01352835 0.013412 0.01353049 0.01281428]
|
|
|
|
mean value: 0.014145827293395996
|
|
|
|
key: score_time
|
|
value: [0.01165104 0.00915432 0.00896955 0.00860596 0.00863862 0.00876713
|
|
0.00878072 0.00871062 0.0086937 0.00867486]
|
|
|
|
mean value: 0.009064650535583496
|
|
|
|
key: test_mcc
|
|
value: [0.9 1. 0.89893315 1. 0.80903983 0.68888889
|
|
0.58655573 0.78888889 0.89893315 0.68888889]
|
|
|
|
mean value: 0.8260128526965538
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 1. 0.94736842 1. 0.89473684 0.84210526
|
|
0.78947368 0.89473684 0.94736842 0.84210526]
|
|
|
|
mean value: 0.9105263157894736
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 1. 0.94117647 1. 0.9 0.84210526
|
|
0.81818182 0.9 0.95238095 0.84210526]
|
|
|
|
mean value: 0.9143318188519427
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 1. 1. 0.81818182 0.88888889
|
|
0.75 0.9 0.90909091 0.88888889]
|
|
|
|
mean value: 0.9055050505050505
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.88888889 1. 1. 0.8
|
|
0.9 0.9 1. 0.8 ]
|
|
|
|
mean value: 0.9288888888888889
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 1. 0.94444444 1. 0.9 0.84444444
|
|
0.78333333 0.89444444 0.94444444 0.84444444]
|
|
|
|
mean value: 0.9105555555555556
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 1. 0.88888889 1. 0.81818182 0.72727273
|
|
0.69230769 0.81818182 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8481196581196582
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.01
|
|
|
|
Accuracy on Blind test: 0.29
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09221125 0.09298205 0.09245825 0.09219337 0.09401751 0.09261203
|
|
0.09346962 0.09304667 0.09442568 0.0930686 ]
|
|
|
|
mean value: 0.09304850101470948
|
|
|
|
key: score_time
|
|
value: [0.01839948 0.01745415 0.01796627 0.01764512 0.01756287 0.01763606
|
|
0.01759124 0.01754975 0.01794505 0.01758528]
|
|
|
|
mean value: 0.0177335262298584
|
|
|
|
key: test_mcc
|
|
value: [0.36666667 0.78888889 0.57777778 0.48934516 0.68543653 0.58655573
|
|
0.38204659 0.89893315 0.89893315 0.47777778]
|
|
|
|
mean value: 0.6152361415947837
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.89473684 0.78947368 0.73684211 0.84210526 0.78947368
|
|
0.68421053 0.94736842 0.94736842 0.73684211]
|
|
|
|
mean value: 0.8052631578947368
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.88888889 0.77777778 0.66666667 0.82352941 0.81818182
|
|
0.66666667 0.95238095 0.95238095 0.73684211]
|
|
|
|
mean value: 0.7949981906638253
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.88888889 0.77777778 0.83333333 0.875 0.75
|
|
0.75 0.90909091 0.90909091 0.77777778]
|
|
|
|
mean value: 0.8137626262626263
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.88888889 0.77777778 0.55555556 0.77777778 0.9
|
|
0.6 1. 1. 0.7 ]
|
|
|
|
mean value: 0.7866666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.68333333 0.89444444 0.78888889 0.72777778 0.83888889 0.78333333
|
|
0.68888889 0.94444444 0.94444444 0.73888889]
|
|
|
|
mean value: 0.8033333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.8 0.63636364 0.5 0.7 0.69230769
|
|
0.5 0.90909091 0.90909091 0.58333333]
|
|
|
|
mean value: 0.673018648018648
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00990391 0.01023483 0.0104568 0.01004553 0.01015067 0.0101018
|
|
0.00999546 0.00995278 0.01025414 0.01000476]
|
|
|
|
mean value: 0.010110068321228027
|
|
|
|
key: score_time
|
|
value: [0.01014376 0.00980282 0.00971484 0.00983143 0.008708 0.00875425
|
|
0.00949526 0.00877333 0.00906682 0.00943732]
|
|
|
|
mean value: 0.009372782707214356
|
|
|
|
key: test_mcc
|
|
value: [ 0.05555556 0.36803496 0.06900656 0.25844328 0.36803496 0.1495142
|
|
-0.1495142 0.4719399 0.15118579 0.58655573]
|
|
|
|
mean value: 0.23287567404348397
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.52631579 0.68421053 0.52631579 0.63157895 0.68421053 0.57894737
|
|
0.42105263 0.73684211 0.57894737 0.78947368]
|
|
|
|
mean value: 0.6157894736842106
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.52631579 0.625 0.57142857 0.58823529 0.625 0.63636364
|
|
0.35294118 0.76190476 0.66666667 0.81818182]
|
|
|
|
mean value: 0.6172037714607375
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.71428571 0.5 0.625 0.71428571 0.58333333
|
|
0.42857143 0.72727273 0.57142857 0.75 ]
|
|
|
|
mean value: 0.6114177489177489
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.55555556 0.66666667 0.55555556 0.55555556 0.7
|
|
0.3 0.8 0.8 0.9 ]
|
|
|
|
mean value: 0.638888888888889
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.52777778 0.67777778 0.53333333 0.62777778 0.67777778 0.57222222
|
|
0.42777778 0.73333333 0.56666667 0.78333333]
|
|
|
|
mean value: 0.6127777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.35714286 0.45454545 0.4 0.41666667 0.45454545 0.46666667
|
|
0.21428571 0.61538462 0.5 0.69230769]
|
|
|
|
mean value: 0.45715451215451214
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.24602675 1.24929047 1.26128054 1.30116081 1.24489021 1.29069042
|
|
1.24034953 1.26967573 1.27572656 1.30667663]
|
|
|
|
mean value: 1.2685767650604247
|
|
|
|
key: score_time
|
|
value: [0.09894919 0.0977664 0.09855866 0.09779406 0.09801531 0.09780979
|
|
0.09785795 0.09849072 0.09950256 0.0991888 ]
|
|
|
|
mean value: 0.09839334487915039
|
|
|
|
key: test_mcc
|
|
value: [0.9 1. 0.9 0.89893315 1. 0.89893315
|
|
0.68888889 0.89893315 0.89893315 0.59554321]
|
|
|
|
mean value: 0.8680164700535378
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 1. 0.94736842 0.94736842 1. 0.94736842
|
|
0.84210526 0.94736842 0.94736842 0.78947368]
|
|
|
|
mean value: 0.9315789473684211
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 1. 0.94736842 0.94117647 1. 0.95238095
|
|
0.84210526 0.95238095 0.95238095 0.77777778]
|
|
|
|
mean value: 0.9312939210772028
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 0.9 1. 1. 0.90909091
|
|
0.88888889 0.90909091 0.90909091 0.875 ]
|
|
|
|
mean value: 0.9291161616161616
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.88888889 1. 1.
|
|
0.8 1. 1. 0.7 ]
|
|
|
|
mean value: 0.9388888888888889
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 1. 0.95 0.94444444 1. 0.94444444
|
|
0.84444444 0.94444444 0.94444444 0.79444444]
|
|
|
|
mean value: 0.9316666666666666
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 1. 0.9 0.88888889 1. 0.90909091
|
|
0.72727273 0.90909091 0.90909091 0.63636364]
|
|
|
|
mean value: 0.877979797979798
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.86340189 0.87309051 0.92426157 0.83879042 0.85211325 0.87614226
|
|
0.94574165 0.88018322 0.87638187 0.88157725]
|
|
|
|
mean value: 0.8811683893203736
|
|
|
|
key: score_time
|
|
value: [0.23261523 0.22359085 0.21976709 0.22133017 0.22522712 0.20595551
|
|
0.25735569 0.22519827 0.19468021 0.11871862]
|
|
|
|
mean value: 0.21244387626647948
|
|
|
|
key: test_mcc
|
|
value: [0.9 1. 0.89893315 0.89893315 0.89893315 1.
|
|
0.68888889 0.89893315 0.89893315 0.41773368]
|
|
|
|
mean value: 0.850128831516684
|
|
|
|
key: train_mcc
|
|
value: [0.9649747 0.95346936 0.94157888 0.95346936 0.94157888 0.95348202
|
|
0.94158687 0.95321477 0.95321477 0.95348202]
|
|
|
|
mean value: 0.9510051629591505
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 1. 0.94736842 0.94736842 0.94736842 1.
|
|
0.84210526 0.94736842 0.94736842 0.68421053]
|
|
|
|
mean value: 0.9210526315789473
|
|
|
|
key: train_accuracy
|
|
value: [0.98245614 0.97660819 0.97076023 0.97660819 0.97076023 0.97660819
|
|
0.97076023 0.97660819 0.97660819 0.97660819]
|
|
|
|
mean value: 0.975438596491228
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 1. 0.94117647 0.94117647 0.94117647 1.
|
|
0.84210526 0.95238095 0.95238095 0.625 ]
|
|
|
|
mean value: 0.9142765000737136
|
|
|
|
key: train_fscore
|
|
value: [0.98265896 0.97701149 0.97109827 0.97701149 0.97109827 0.97674419
|
|
0.97076023 0.97647059 0.97647059 0.97674419]
|
|
|
|
mean value: 0.9756068262316967
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 1. 1. 1. 1.
|
|
0.88888889 0.90909091 0.90909091 0.83333333]
|
|
|
|
mean value: 0.944040404040404
|
|
|
|
key: train_precision
|
|
value: [0.97701149 0.96590909 0.96551724 0.96590909 0.96551724 0.96551724
|
|
0.96511628 0.97647059 0.97647059 0.96551724]
|
|
|
|
mean value: 0.9688956097128653
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.88888889 0.88888889 0.88888889 1.
|
|
0.8 1. 1. 0.5 ]
|
|
|
|
mean value: 0.8966666666666666
|
|
|
|
key: train_recall
|
|
value: [0.98837209 0.98837209 0.97674419 0.98837209 0.97674419 0.98823529
|
|
0.97647059 0.97647059 0.97647059 0.98823529]
|
|
|
|
mean value: 0.9824487004103967
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 1. 0.94444444 0.94444444 0.94444444 1.
|
|
0.84444444 0.94444444 0.94444444 0.69444444]
|
|
|
|
mean value: 0.9211111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.98242134 0.97653899 0.97072503 0.97653899 0.97072503 0.97667579
|
|
0.97079343 0.97660739 0.97660739 0.97667579]
|
|
|
|
mean value: 0.9754309165526677
|
|
|
|
key: test_jcc
|
|
value: [0.9 1. 0.88888889 0.88888889 0.88888889 1.
|
|
0.72727273 0.90909091 0.90909091 0.45454545]
|
|
|
|
mean value: 0.8566666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.96590909 0.95505618 0.94382022 0.95505618 0.94382022 0.95454545
|
|
0.94318182 0.95402299 0.95402299 0.95454545]
|
|
|
|
mean value: 0.9523980604182076
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02470326 0.01024437 0.0102489 0.01061344 0.01052642 0.01032305
|
|
0.01032853 0.01031137 0.01031613 0.01032352]
|
|
|
|
mean value: 0.011793899536132812
|
|
|
|
key: score_time
|
|
value: [0.01329374 0.00965357 0.00974584 0.009866 0.0098691 0.00955677
|
|
0.00957489 0.00953984 0.009583 0.00955963]
|
|
|
|
mean value: 0.010024237632751464
|
|
|
|
key: test_mcc
|
|
value: [0.05555556 0.26666667 0.57777778 0.02721655 0.36803496 0.47777778
|
|
0.41773368 0.4719399 0.26666667 0.15555556]
|
|
|
|
mean value: 0.3084925097927452
|
|
|
|
key: train_mcc
|
|
value: [0.61768697 0.55968039 0.5683567 0.61768697 0.5872471 0.5701417
|
|
0.63393269 0.52117999 0.58646061 0.58264941]
|
|
|
|
mean value: 0.5845022535735144
|
|
|
|
key: test_accuracy
|
|
value: [0.52631579 0.63157895 0.78947368 0.52631579 0.68421053 0.73684211
|
|
0.68421053 0.73684211 0.63157895 0.57894737]
|
|
|
|
mean value: 0.6526315789473685
|
|
|
|
key: train_accuracy
|
|
value: [0.80701754 0.77777778 0.78362573 0.80701754 0.78947368 0.78362573
|
|
0.8128655 0.76023392 0.78947368 0.78947368]
|
|
|
|
mean value: 0.7900584795321637
|
|
|
|
key: test_fscore
|
|
value: [0.52631579 0.63157895 0.77777778 0.30769231 0.625 0.73684211
|
|
0.625 0.76190476 0.63157895 0.6 ]
|
|
|
|
mean value: 0.6223690636848531
|
|
|
|
key: train_fscore
|
|
value: [0.79754601 0.7654321 0.77844311 0.79754601 0.7721519 0.77018634
|
|
0.79487179 0.75151515 0.76923077 0.775 ]
|
|
|
|
mean value: 0.7771923186833384
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 0.77777778 0.5 0.71428571 0.77777778
|
|
0.83333333 0.72727273 0.66666667 0.6 ]
|
|
|
|
mean value: 0.6697113997113997
|
|
|
|
key: train_precision
|
|
value: [0.84415584 0.81578947 0.80246914 0.84415584 0.84722222 0.81578947
|
|
0.87323944 0.775 0.84507042 0.82666667]
|
|
|
|
mean value: 0.8289558519526397
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.66666667 0.77777778 0.22222222 0.55555556 0.7
|
|
0.5 0.8 0.6 0.6 ]
|
|
|
|
mean value: 0.5977777777777777
|
|
|
|
key: train_recall
|
|
value: [0.75581395 0.72093023 0.75581395 0.75581395 0.70930233 0.72941176
|
|
0.72941176 0.72941176 0.70588235 0.72941176]
|
|
|
|
mean value: 0.7321203830369357
|
|
|
|
key: test_roc_auc
|
|
value: [0.52777778 0.63333333 0.78888889 0.51111111 0.67777778 0.73888889
|
|
0.69444444 0.73333333 0.63333333 0.57777778]
|
|
|
|
mean value: 0.6516666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.80731874 0.77811218 0.78378933 0.80731874 0.78994528 0.78331053
|
|
0.8123803 0.76005472 0.78898769 0.78912449]
|
|
|
|
mean value: 0.7900341997264022
|
|
|
|
key: test_jcc
|
|
value: [0.35714286 0.46153846 0.63636364 0.18181818 0.45454545 0.58333333
|
|
0.45454545 0.61538462 0.46153846 0.42857143]
|
|
|
|
mean value: 0.4634781884781885
|
|
|
|
key: train_jcc
|
|
value: [0.66326531 0.62 0.6372549 0.66326531 0.62886598 0.62626263
|
|
0.65957447 0.60194175 0.625 0.63265306]
|
|
|
|
mean value: 0.6358083396732164
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.07475376 0.05589986 0.05632401 0.05659246 0.05435586 0.06486392
|
|
0.05442691 0.05984616 0.05690098 0.05387878]
|
|
|
|
mean value: 0.05878427028656006
|
|
|
|
key: score_time
|
|
value: [0.01059294 0.0105722 0.0108428 0.01123548 0.01082087 0.01124358
|
|
0.01102734 0.01148081 0.01094484 0.01126599]
|
|
|
|
mean value: 0.011002683639526367
|
|
|
|
key: test_mcc
|
|
value: [0.9 1. 0.9 1. 0.9 0.78888889
|
|
0.9 0.89893315 0.89893315 0.68888889]
|
|
|
|
mean value: 0.8875644077679756
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 1. 0.94736842 1. 0.94736842 0.89473684
|
|
0.94736842 0.94736842 0.94736842 0.84210526]
|
|
|
|
mean value: 0.9421052631578947
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 1. 0.94736842 1. 0.94736842 0.9
|
|
0.94736842 0.95238095 0.95238095 0.84210526]
|
|
|
|
mean value: 0.9436340852130326
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 0.9 1. 0.9 0.9
|
|
1. 0.90909091 0.90909091 0.88888889]
|
|
|
|
mean value: 0.9307070707070707
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.9 0.9 1. 1. 0.8]
|
|
|
|
mean value: 0.96
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 1. 0.95 1. 0.95 0.89444444
|
|
0.95 0.94444444 0.94444444 0.84444444]
|
|
|
|
mean value: 0.9427777777777777
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 1. 0.9 1. 0.9 0.81818182
|
|
0.9 0.90909091 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8963636363636364
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03021121 0.05368328 0.04535365 0.05584192 0.05373716 0.05315685
|
|
0.05115438 0.05387235 0.05361891 0.07130933]
|
|
|
|
mean value: 0.05219390392303467
|
|
|
|
key: score_time
|
|
value: [0.0217042 0.02236819 0.02124619 0.02397776 0.01195145 0.01713133
|
|
0.021698 0.02353668 0.02183437 0.03351045]
|
|
|
|
mean value: 0.021895861625671385
|
|
|
|
key: test_mcc
|
|
value: [0.78888889 0.78888889 0.48934516 0.71611487 0.57777778 0.4719399
|
|
0.59554321 0.59554321 0.80903983 0.80903983]
|
|
|
|
mean value: 0.6642121590843045
|
|
|
|
key: train_mcc
|
|
value: [0.97660739 0.96497948 0.96497948 0.96497948 0.96497948 0.97660739
|
|
0.98837209 0.98837051 0.9649747 0.98837209]
|
|
|
|
mean value: 0.974322209012793
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.89473684 0.73684211 0.84210526 0.78947368 0.73684211
|
|
0.78947368 0.78947368 0.89473684 0.89473684]
|
|
|
|
mean value: 0.8263157894736842
|
|
|
|
key: train_accuracy
|
|
value: [0.98830409 0.98245614 0.98245614 0.98245614 0.98245614 0.98830409
|
|
0.99415205 0.99415205 0.98245614 0.99415205]
|
|
|
|
mean value: 0.9871345029239765
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.88888889 0.66666667 0.8 0.77777778 0.76190476
|
|
0.77777778 0.77777778 0.88888889 0.88888889]
|
|
|
|
mean value: 0.8117460317460318
|
|
|
|
key: train_fscore
|
|
value: [0.98837209 0.98245614 0.98245614 0.98245614 0.98245614 0.98823529
|
|
0.99415205 0.99408284 0.98224852 0.99415205]
|
|
|
|
mean value: 0.9871067403058408
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.88888889 0.83333333 1. 0.77777778 0.72727273
|
|
0.875 0.875 1. 1. ]
|
|
|
|
mean value: 0.8866161616161616
|
|
|
|
key: train_precision
|
|
value: [0.98837209 0.98823529 0.98823529 0.98823529 0.98823529 0.98823529
|
|
0.98837209 1. 0.98809524 0.98837209]
|
|
|
|
mean value: 0.9894387987753241
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 0.55555556 0.66666667 0.77777778 0.8
|
|
0.7 0.7 0.8 0.8 ]
|
|
|
|
mean value: 0.7577777777777778
|
|
|
|
key: train_recall
|
|
value: [0.98837209 0.97674419 0.97674419 0.97674419 0.97674419 0.98823529
|
|
1. 0.98823529 0.97647059 1. ]
|
|
|
|
mean value: 0.984829001367989
|
|
|
|
key: test_roc_auc
|
|
value: [0.89444444 0.89444444 0.72777778 0.83333333 0.78888889 0.73333333
|
|
0.79444444 0.79444444 0.9 0.9 ]
|
|
|
|
mean value: 0.8261111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.98830369 0.98248974 0.98248974 0.98248974 0.98248974 0.98830369
|
|
0.99418605 0.99411765 0.98242134 0.99418605]
|
|
|
|
mean value: 0.9871477428180575
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.8 0.5 0.66666667 0.63636364 0.61538462
|
|
0.63636364 0.63636364 0.8 0.8 ]
|
|
|
|
mean value: 0.6891142191142191
|
|
|
|
key: train_jcc
|
|
value: [0.97701149 0.96551724 0.96551724 0.96551724 0.96551724 0.97674419
|
|
0.98837209 0.98823529 0.96511628 0.98837209]
|
|
|
|
mean value: 0.9745920405050553
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01129794 0.00918961 0.00925589 0.00931072 0.00889683 0.00914097
|
|
0.00898027 0.00971103 0.00915551 0.00909138]
|
|
|
|
mean value: 0.009403014183044433
|
|
|
|
key: score_time
|
|
value: [0.00905991 0.00906253 0.00890875 0.00907946 0.00857306 0.00863171
|
|
0.00874209 0.00880027 0.00887442 0.00877547]
|
|
|
|
mean value: 0.008850765228271485
|
|
|
|
key: test_mcc
|
|
value: [0.47777778 0.4719399 0.47777778 0.54433105 0.57777778 0.89893315
|
|
0.50604808 0.58655573 0.80507649 0.05555556]
|
|
|
|
mean value: 0.540177328470744
|
|
|
|
key: train_mcc
|
|
value: [0.64911081 0.62571819 0.64933644 0.59069767 0.62571819 0.60254481
|
|
0.61447932 0.59069767 0.59150228 0.67273225]
|
|
|
|
mean value: 0.6212537649466758
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.73684211 0.73684211 0.73684211 0.78947368 0.94736842
|
|
0.73684211 0.78947368 0.89473684 0.52631579]
|
|
|
|
mean value: 0.763157894736842
|
|
|
|
key: train_accuracy
|
|
value: [0.8245614 0.8128655 0.8245614 0.79532164 0.8128655 0.80116959
|
|
0.80701754 0.79532164 0.79532164 0.83625731]
|
|
|
|
mean value: 0.8105263157894737
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.70588235 0.73684211 0.61538462 0.77777778 0.95238095
|
|
0.70588235 0.81818182 0.90909091 0.52631579]
|
|
|
|
mean value: 0.7484580778698425
|
|
|
|
key: train_fscore
|
|
value: [0.8255814 0.81395349 0.82352941 0.79532164 0.81395349 0.80232558
|
|
0.80924855 0.79532164 0.78787879 0.8372093 ]
|
|
|
|
mean value: 0.8104323285224544
|
|
|
|
key: test_precision
|
|
value: [0.7 0.75 0.7 1. 0.77777778 0.90909091
|
|
0.85714286 0.75 0.83333333 0.55555556]
|
|
|
|
mean value: 0.7832900432900433
|
|
|
|
key: train_precision
|
|
value: [0.8255814 0.81395349 0.83333333 0.8 0.81395349 0.79310345
|
|
0.79545455 0.79069767 0.8125 0.82758621]
|
|
|
|
mean value: 0.810616358047192
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.66666667 0.77777778 0.44444444 0.77777778 1.
|
|
0.6 0.9 1. 0.5 ]
|
|
|
|
mean value: 0.7444444444444445
|
|
|
|
key: train_recall
|
|
value: [0.8255814 0.81395349 0.81395349 0.79069767 0.81395349 0.81176471
|
|
0.82352941 0.8 0.76470588 0.84705882]
|
|
|
|
mean value: 0.8105198358413133
|
|
|
|
key: test_roc_auc
|
|
value: [0.73888889 0.73333333 0.73888889 0.72222222 0.78888889 0.94444444
|
|
0.74444444 0.78333333 0.88888889 0.52777778]
|
|
|
|
mean value: 0.7611111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.8245554 0.8128591 0.8246238 0.79534884 0.8128591 0.80123119
|
|
0.80711354 0.79534884 0.79514364 0.83632011]
|
|
|
|
mean value: 0.8105403556771547
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.54545455 0.58333333 0.44444444 0.63636364 0.90909091
|
|
0.54545455 0.69230769 0.83333333 0.35714286]
|
|
|
|
mean value: 0.613025863025863
|
|
|
|
key: train_jcc
|
|
value: [0.7029703 0.68627451 0.7 0.66019417 0.68627451 0.66990291
|
|
0.67961165 0.66019417 0.65 0.72 ]
|
|
|
|
mean value: 0.6815422229258905
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01282477 0.01607466 0.01618195 0.01664495 0.01493382 0.01674604
|
|
0.01612163 0.01652718 0.01608014 0.01816034]
|
|
|
|
mean value: 0.016029548645019532
|
|
|
|
key: score_time
|
|
value: [0.0095787 0.01163363 0.0116837 0.01164746 0.01160502 0.01160955
|
|
0.01166129 0.01157117 0.01157427 0.01168036]
|
|
|
|
mean value: 0.011424517631530762
|
|
|
|
key: test_mcc
|
|
value: [0.68543653 0.58655573 0.72456884 0.68543653 0.62994079 1.
|
|
0.59554321 0.78888889 0.78888889 0.59554321]
|
|
|
|
mean value: 0.7080802606237283
|
|
|
|
key: train_mcc
|
|
value: [0.95348202 0.88526575 0.95346936 0.97660739 0.82414084 0.96497948
|
|
0.94157888 0.96497948 0.95321477 0.96497948]
|
|
|
|
mean value: 0.938269744871367
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.78947368 0.84210526 0.84210526 0.78947368 1.
|
|
0.78947368 0.89473684 0.89473684 0.78947368]
|
|
|
|
mean value: 0.8473684210526315
|
|
|
|
key: train_accuracy
|
|
value: [0.97660819 0.94152047 0.97660819 0.98830409 0.90643275 0.98245614
|
|
0.97076023 0.98245614 0.97660819 0.98245614]
|
|
|
|
mean value: 0.968421052631579
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.75 0.85714286 0.82352941 0.71428571 1.
|
|
0.77777778 0.9 0.9 0.77777778]
|
|
|
|
mean value: 0.8324042950513538
|
|
|
|
key: train_fscore
|
|
value: [0.97647059 0.93975904 0.97701149 0.98837209 0.89873418 0.98245614
|
|
0.9704142 0.98245614 0.97647059 0.98245614]
|
|
|
|
mean value: 0.9674600599342549
|
|
|
|
key: test_precision
|
|
value: [0.875 0.85714286 0.75 0.875 1. 1.
|
|
0.875 0.9 0.9 0.875 ]
|
|
|
|
mean value: 0.8907142857142857
|
|
|
|
key: train_precision
|
|
value: [0.98809524 0.975 0.96590909 0.98837209 0.98611111 0.97674419
|
|
0.97619048 0.97674419 0.97647059 0.97674419]
|
|
|
|
mean value: 0.9786381155704001
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.66666667 1. 0.77777778 0.55555556 1.
|
|
0.7 0.9 0.9 0.7 ]
|
|
|
|
mean value: 0.7977777777777778
|
|
|
|
key: train_recall
|
|
value: [0.96511628 0.90697674 0.98837209 0.98837209 0.8255814 0.98823529
|
|
0.96470588 0.98823529 0.97647059 0.98823529]
|
|
|
|
mean value: 0.9580300957592339
|
|
|
|
key: test_roc_auc
|
|
value: [0.83888889 0.78333333 0.85 0.83888889 0.77777778 1.
|
|
0.79444444 0.89444444 0.89444444 0.79444444]
|
|
|
|
mean value: 0.8466666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.97667579 0.94172367 0.97653899 0.98830369 0.90690834 0.98248974
|
|
0.97072503 0.98248974 0.97660739 0.98248974]
|
|
|
|
mean value: 0.9684952120383037
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.6 0.75 0.7 0.55555556 1.
|
|
0.63636364 0.81818182 0.81818182 0.63636364]
|
|
|
|
mean value: 0.7214646464646465
|
|
|
|
key: train_jcc
|
|
value: [0.95402299 0.88636364 0.95505618 0.97701149 0.81609195 0.96551724
|
|
0.94252874 0.96551724 0.95402299 0.96551724]
|
|
|
|
mean value: 0.9381649701196388
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01434088 0.01442695 0.0138278 0.01403832 0.01457047 0.01390123
|
|
0.01327515 0.01421165 0.01355481 0.0140357 ]
|
|
|
|
mean value: 0.01401829719543457
|
|
|
|
key: score_time
|
|
value: [0.01172566 0.01158524 0.01162529 0.011554 0.01161218 0.01162124
|
|
0.01161218 0.01158285 0.01156425 0.01154423]
|
|
|
|
mean value: 0.01160271167755127
|
|
|
|
key: test_mcc
|
|
value: [0.57777778 0.72456884 1. 0.72456884 0.78888889 0.2236068
|
|
0.48989795 0.78888889 0.62994079 0.68888889]
|
|
|
|
mean value: 0.6637027653718715
|
|
|
|
key: train_mcc
|
|
value: [0.9300862 0.95346936 0.94158687 0.53765754 0.9649747 0.13441438
|
|
0.38650754 0.96497948 0.74471077 0.96497948]
|
|
|
|
mean value: 0.7523366318438298
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.84210526 1. 0.84210526 0.89473684 0.52631579
|
|
0.68421053 0.89473684 0.78947368 0.84210526]
|
|
|
|
mean value: 0.8105263157894737
|
|
|
|
key: train_accuracy
|
|
value: [0.96491228 0.97660819 0.97076023 0.7251462 0.98245614 0.52046784
|
|
0.63157895 0.98245614 0.85964912 0.98245614]
|
|
|
|
mean value: 0.8596491228070176
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.85714286 1. 0.85714286 0.88888889 0.18181818
|
|
0.57142857 0.9 0.83333333 0.84210526]
|
|
|
|
mean value: 0.7709637730690362
|
|
|
|
key: train_fscore
|
|
value: [0.96470588 0.97701149 0.97076023 0.78538813 0.98265896 0.06818182
|
|
0.41121495 0.98245614 0.875 0.98245614]
|
|
|
|
mean value: 0.7999833750069998
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.75 1. 0.75 0.88888889 1.
|
|
1. 0.9 0.71428571 0.88888889]
|
|
|
|
mean value: 0.866984126984127
|
|
|
|
key: train_precision
|
|
value: [0.97619048 0.96590909 0.97647059 0.64661654 0.97701149 1.
|
|
1. 0.97674419 0.78504673 0.97674419]
|
|
|
|
mean value: 0.9280733292006104
|
|
|
|
key: test_recall
|
|
value: [0.77777778 1. 1. 1. 0.88888889 0.1
|
|
0.4 0.9 1. 0.8 ]
|
|
|
|
mean value: 0.7866666666666666
|
|
|
|
key: train_recall
|
|
value: [0.95348837 0.98837209 0.96511628 1. 0.98837209 0.03529412
|
|
0.25882353 0.98823529 0.98823529 0.98823529]
|
|
|
|
mean value: 0.8154172366621067
|
|
|
|
key: test_roc_auc
|
|
value: [0.78888889 0.85 1. 0.85 0.89444444 0.55
|
|
0.7 0.89444444 0.77777778 0.84444444]
|
|
|
|
mean value: 0.815
|
|
|
|
key: train_roc_auc
|
|
value: [0.96497948 0.97653899 0.97079343 0.72352941 0.98242134 0.51764706
|
|
0.62941176 0.98248974 0.86039672 0.98248974]
|
|
|
|
mean value: 0.8590697674418605
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.75 1. 0.75 0.8 0.1
|
|
0.4 0.81818182 0.71428571 0.72727273]
|
|
|
|
mean value: 0.6696103896103897
|
|
|
|
key: train_jcc
|
|
value: [0.93181818 0.95505618 0.94318182 0.64661654 0.96590909 0.03529412
|
|
0.25882353 0.96551724 0.77777778 0.96551724]
|
|
|
|
mean value: 0.7445511719632978
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12807393 0.11261487 0.10838461 0.11364746 0.11253858 0.1072166
|
|
0.10396695 0.104146 0.10384583 0.10411072]
|
|
|
|
mean value: 0.10985455513000489
|
|
|
|
key: score_time
|
|
value: [0.01615238 0.01558518 0.01613688 0.01571083 0.01624513 0.01467013
|
|
0.0145421 0.01479316 0.01455259 0.01447678]
|
|
|
|
mean value: 0.015286517143249512
|
|
|
|
key: test_mcc
|
|
value: [0.78888889 1. 1. 1. 0.9 0.9
|
|
0.80903983 0.89893315 0.89893315 0.68888889]
|
|
|
|
mean value: 0.8884683912635647
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 1. 1. 1. 0.94736842 0.94736842
|
|
0.89473684 0.94736842 0.94736842 0.84210526]
|
|
|
|
mean value: 0.9421052631578947
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 1. 1. 1. 0.94736842 0.94736842
|
|
0.88888889 0.95238095 0.95238095 0.84210526]
|
|
|
|
mean value: 0.9419381787802841
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 1. 0.9 1.
|
|
1. 0.90909091 0.90909091 0.88888889]
|
|
|
|
mean value: 0.9495959595959595
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 1. 1. 0.9
|
|
0.8 1. 1. 0.8 ]
|
|
|
|
mean value: 0.9388888888888889
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.89444444 1. 1. 1. 0.95 0.95
|
|
0.9 0.94444444 0.94444444 0.84444444]
|
|
|
|
mean value: 0.9427777777777777
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 1. 1. 1. 0.9 0.9
|
|
0.8 0.90909091 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8945454545454545
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03604388 0.03716707 0.03749108 0.03307009 0.0392766 0.05371833
|
|
0.04788876 0.03437924 0.0533731 0.05464911]
|
|
|
|
mean value: 0.042705726623535153
|
|
|
|
key: score_time
|
|
value: [0.02338982 0.02120996 0.02390671 0.02185345 0.02301741 0.03634286
|
|
0.02367401 0.01739883 0.03800941 0.03902411]
|
|
|
|
mean value: 0.026782655715942384
|
|
|
|
key: test_mcc
|
|
value: [0.9 1. 0.9 1. 1. 0.80903983
|
|
0.9 0.89893315 0.89893315 0.68888889]
|
|
|
|
mean value: 0.8995795023746759
|
|
|
|
key: train_mcc
|
|
value: [0.96497948 0.97687783 0.97687783 0.95321477 0.98837209 0.97687158
|
|
0.97660739 0.98837051 0.98837051 1. ]
|
|
|
|
mean value: 0.9790541994787398
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 1. 0.94736842 1. 1. 0.89473684
|
|
0.94736842 0.94736842 0.94736842 0.84210526]
|
|
|
|
mean value: 0.9473684210526315
|
|
|
|
key: train_accuracy
|
|
value: [0.98245614 0.98830409 0.98830409 0.97660819 0.99415205 0.98830409
|
|
0.98830409 0.99415205 0.99415205 1. ]
|
|
|
|
mean value: 0.9894736842105263
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 1. 0.94736842 1. 1. 0.88888889
|
|
0.94736842 0.95238095 0.95238095 0.84210526]
|
|
|
|
mean value: 0.9477861319966583
|
|
|
|
key: train_fscore
|
|
value: [0.98245614 0.98823529 0.98823529 0.97674419 0.99415205 0.98809524
|
|
0.98823529 0.99408284 0.99408284 1. ]
|
|
|
|
mean value: 0.9894319174102566
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 0.9 1. 1. 1.
|
|
1. 0.90909091 0.90909091 0.88888889]
|
|
|
|
mean value: 0.9507070707070707
|
|
|
|
key: train_precision
|
|
value: [0.98823529 1. 1. 0.97674419 1. 1.
|
|
0.98823529 1. 1. 1. ]
|
|
|
|
mean value: 0.9953214774281806
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.8 0.9 1. 1. 0.8]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [0.97674419 0.97674419 0.97674419 0.97674419 0.98837209 0.97647059
|
|
0.98823529 0.98823529 0.98823529 1. ]
|
|
|
|
mean value: 0.9836525307797538
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 1. 0.95 1. 1. 0.9
|
|
0.95 0.94444444 0.94444444 0.84444444]
|
|
|
|
mean value: 0.9483333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.98248974 0.98837209 0.98837209 0.97660739 0.99418605 0.98823529
|
|
0.98830369 0.99411765 0.99411765 1. ]
|
|
|
|
mean value: 0.9894801641586868
|
|
|
|
key: test_jcc
|
|
value: [0.9 1. 0.9 1. 1. 0.8
|
|
0.9 0.90909091 0.90909091 0.72727273]
|
|
|
|
mean value: 0.9045454545454545
|
|
|
|
key: train_jcc
|
|
value: [0.96551724 0.97674419 0.97674419 0.95454545 0.98837209 0.97647059
|
|
0.97674419 0.98823529 0.98823529 1. ]
|
|
|
|
mean value: 0.9791608523558144
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04278493 0.05287099 0.07897472 0.04232192 0.05876231 0.05915022
|
|
0.05846882 0.05894804 0.05870676 0.04686022]
|
|
|
|
mean value: 0.05578489303588867
|
|
|
|
key: score_time
|
|
value: [0.02433133 0.02237487 0.03089428 0.02354026 0.02206278 0.0225935
|
|
0.02022529 0.02299881 0.02124119 0.02438021]
|
|
|
|
mean value: 0.023464250564575195
|
|
|
|
key: test_mcc
|
|
value: [0.15555556 0.68543653 0.4719399 0.39056329 0.36803496 0.05555556
|
|
0.26666667 0.78888889 0.36666667 0.41773368]
|
|
|
|
mean value: 0.39670416941770253
|
|
|
|
key: train_mcc
|
|
value: [0.98837209 0.98837209 0.98837209 1. 1. 1.
|
|
0.98837051 0.98837051 0.98837051 0.98837051]
|
|
|
|
mean value: 0.9918598324555198
|
|
|
|
key: test_accuracy
|
|
value: [0.57894737 0.84210526 0.73684211 0.68421053 0.68421053 0.52631579
|
|
0.63157895 0.89473684 0.68421053 0.68421053]
|
|
|
|
mean value: 0.6947368421052631
|
|
|
|
key: train_accuracy
|
|
value: [0.99415205 0.99415205 0.99415205 1. 1. 1.
|
|
0.99415205 0.99415205 0.99415205 0.99415205]
|
|
|
|
mean value: 0.995906432748538
|
|
|
|
key: test_fscore
|
|
value: [0.55555556 0.82352941 0.70588235 0.57142857 0.625 0.52631579
|
|
0.63157895 0.9 0.7 0.625 ]
|
|
|
|
mean value: 0.6664290628532115
|
|
|
|
key: train_fscore
|
|
value: [0.99415205 0.99415205 0.99415205 1. 1. 1.
|
|
0.99408284 0.99408284 0.99408284 0.99408284]
|
|
|
|
mean value: 0.9958787501297622
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.875 0.75 0.8 0.71428571 0.55555556
|
|
0.66666667 0.9 0.7 0.83333333]
|
|
|
|
mean value: 0.7350396825396825
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.77777778 0.66666667 0.44444444 0.55555556 0.5
|
|
0.6 0.9 0.7 0.5 ]
|
|
|
|
mean value: 0.62
|
|
|
|
key: train_recall
|
|
value: [0.98837209 0.98837209 0.98837209 1. 1. 1.
|
|
0.98823529 0.98823529 0.98823529 0.98823529]
|
|
|
|
mean value: 0.9918057455540356
|
|
|
|
key: test_roc_auc
|
|
value: [0.57777778 0.83888889 0.73333333 0.67222222 0.67777778 0.52777778
|
|
0.63333333 0.89444444 0.68333333 0.69444444]
|
|
|
|
mean value: 0.6933333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.99418605 0.99418605 0.99418605 1. 1. 1.
|
|
0.99411765 0.99411765 0.99411765 0.99411765]
|
|
|
|
mean value: 0.9959028727770178
|
|
|
|
key: test_jcc
|
|
value: [0.38461538 0.7 0.54545455 0.4 0.45454545 0.35714286
|
|
0.46153846 0.81818182 0.53846154 0.45454545]
|
|
|
|
mean value: 0.5114485514485514
|
|
|
|
key: train_jcc
|
|
value: [0.98837209 0.98837209 0.98837209 1. 1. 1.
|
|
0.98823529 0.98823529 0.98823529 0.98823529]
|
|
|
|
mean value: 0.9918057455540356
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.33094716 0.31259513 0.31917596 0.32095408 0.31772399 0.31708431
|
|
0.31707764 0.31742764 0.32255292 0.33519816]
|
|
|
|
mean value: 0.32107369899749755
|
|
|
|
key: score_time
|
|
value: [0.0092268 0.00944781 0.00909162 0.00918269 0.00947618 0.00917506
|
|
0.00907254 0.0090673 0.00937724 0.00912404]
|
|
|
|
mean value: 0.009224128723144532
|
|
|
|
key: test_mcc
|
|
value: [0.9 1. 0.9 1. 0.9 0.68888889
|
|
0.9 0.89893315 0.89893315 0.68888889]
|
|
|
|
mean value: 0.8775644077679757
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 1. 0.94736842 1. 0.94736842 0.84210526
|
|
0.94736842 0.94736842 0.94736842 0.84210526]
|
|
|
|
mean value: 0.9368421052631578
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 1. 0.94736842 1. 0.94736842 0.84210526
|
|
0.94736842 0.95238095 0.95238095 0.84210526]
|
|
|
|
mean value: 0.937844611528822
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 0.9 1. 0.9 0.88888889
|
|
1. 0.90909091 0.90909091 0.88888889]
|
|
|
|
mean value: 0.9295959595959595
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.8 0.9 1. 1. 0.8]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 1. 0.95 1. 0.95 0.84444444
|
|
0.95 0.94444444 0.94444444 0.84444444]
|
|
|
|
mean value: 0.9377777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 1. 0.9 1. 0.9 0.72727273
|
|
0.9 0.90909091 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8872727272727273
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01818037 0.01993227 0.03410816 0.02050376 0.02031946 0.02056813
|
|
0.02116418 0.02032781 0.02072144 0.0206151 ]
|
|
|
|
mean value: 0.021644067764282227
|
|
|
|
key: score_time
|
|
value: [0.01197982 0.01421547 0.0123024 0.01404023 0.01683712 0.01726198
|
|
0.01217818 0.02029681 0.02125478 0.02101254]
|
|
|
|
mean value: 0.0161379337310791
|
|
|
|
key: test_mcc
|
|
value: [-0.26666667 0.1495142 -0.05555556 0.26257545 0.15118579 0.25844328
|
|
-0.25844328 0.16854997 0.19096397 0.04494666]
|
|
|
|
mean value: 0.06455138148439796
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.36842105 0.57894737 0.47368421 0.63157895 0.57894737 0.63157895
|
|
0.36842105 0.57894737 0.57894737 0.52631579]
|
|
|
|
mean value: 0.531578947368421
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.5 0.44444444 0.53333333 0.42857143 0.66666667
|
|
0.33333333 0.55555556 0.5 0.57142857]
|
|
|
|
mean value: 0.48666666666666664
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.33333333 0.57142857 0.44444444 0.66666667 0.6 0.63636364
|
|
0.375 0.625 0.66666667 0.54545455]
|
|
|
|
mean value: 0.5464357864357864
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.44444444 0.44444444 0.44444444 0.33333333 0.7
|
|
0.3 0.5 0.4 0.6 ]
|
|
|
|
mean value: 0.44999999999999996
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.36666667 0.57222222 0.47222222 0.62222222 0.56666667 0.62777778
|
|
0.37222222 0.58333333 0.58888889 0.52222222]
|
|
|
|
mean value: 0.5294444444444444
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.33333333 0.28571429 0.36363636 0.27272727 0.5
|
|
0.2 0.38461538 0.33333333 0.4 ]
|
|
|
|
mean value: 0.32733599733599733
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02965856 0.03922558 0.04030275 0.03733325 0.03428888 0.03428817
|
|
0.03525424 0.03363705 0.03504062 0.03319383]
|
|
|
|
mean value: 0.03522229194641113
|
|
|
|
key: score_time
|
|
value: [0.02330279 0.02287483 0.02117157 0.02006721 0.02243257 0.01165771
|
|
0.02202344 0.02095985 0.02206254 0.01817369]
|
|
|
|
mean value: 0.020472621917724608
|
|
|
|
key: test_mcc
|
|
value: [0.78888889 0.89893315 1. 0.89893315 0.80507649 1.
|
|
0.59554321 0.78888889 0.78888889 0.59554321]
|
|
|
|
mean value: 0.816069587615312
|
|
|
|
key: train_mcc
|
|
value: [0.97660739 0.95321477 0.95321477 0.95321477 0.95321477 0.94157888
|
|
0.96497948 0.9649747 0.95321477 0.96497948]
|
|
|
|
mean value: 0.9579193800821753
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.94736842 1. 0.94736842 0.89473684 1.
|
|
0.78947368 0.89473684 0.89473684 0.78947368]
|
|
|
|
mean value: 0.9052631578947369
|
|
|
|
key: train_accuracy
|
|
value: [0.98830409 0.97660819 0.97660819 0.97660819 0.97660819 0.97076023
|
|
0.98245614 0.98245614 0.97660819 0.98245614]
|
|
|
|
mean value: 0.9789473684210526
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.94117647 1. 0.94117647 0.875 1.
|
|
0.77777778 0.9 0.9 0.77777778]
|
|
|
|
mean value: 0.9001797385620915
|
|
|
|
key: train_fscore
|
|
value: [0.98837209 0.97674419 0.97674419 0.97674419 0.97674419 0.9704142
|
|
0.98245614 0.98224852 0.97647059 0.98245614]
|
|
|
|
mean value: 0.9789394428039842
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 1. 1. 1.
|
|
0.875 0.9 0.9 0.875 ]
|
|
|
|
mean value: 0.9438888888888889
|
|
|
|
key: train_precision
|
|
value: [0.98837209 0.97674419 0.97674419 0.97674419 0.97674419 0.97619048
|
|
0.97674419 0.98809524 0.97647059 0.97674419]
|
|
|
|
mean value: 0.9789593511823333
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 1. 0.88888889 0.77777778 1.
|
|
0.7 0.9 0.9 0.7 ]
|
|
|
|
mean value: 0.8644444444444445
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:175: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:178: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.98837209 0.97674419 0.97674419 0.97674419 0.97674419 0.96470588
|
|
0.98823529 0.97647059 0.97647059 0.98823529]
|
|
|
|
mean value: 0.9789466484268126
|
|
|
|
key: test_roc_auc
|
|
value: [0.89444444 0.94444444 1. 0.94444444 0.88888889 1.
|
|
0.79444444 0.89444444 0.89444444 0.79444444]
|
|
|
|
mean value: 0.9049999999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.98830369 0.97660739 0.97660739 0.97660739 0.97660739 0.97072503
|
|
0.98248974 0.98242134 0.97660739 0.98248974]
|
|
|
|
mean value: 0.9789466484268126
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.88888889 1. 0.88888889 0.77777778 1.
|
|
0.63636364 0.81818182 0.81818182 0.63636364]
|
|
|
|
mean value: 0.8264646464646465
|
|
|
|
key: train_jcc
|
|
value: [0.97701149 0.95454545 0.95454545 0.95454545 0.95454545 0.94252874
|
|
0.96551724 0.96511628 0.95402299 0.96551724]
|
|
|
|
mean value: 0.9587895798401012
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.22830915 0.13229847 0.2399621 0.2173996 0.22628093 0.22176266
|
|
0.10810757 0.17843628 0.17112684 0.15505385]
|
|
|
|
mean value: 0.1878737449645996
|
|
|
|
key: score_time
|
|
value: [0.02125955 0.02050495 0.02311754 0.02308416 0.02185082 0.01856279
|
|
0.01214767 0.02271938 0.01241326 0.02345276]
|
|
|
|
mean value: 0.01991128921508789
|
|
|
|
key: test_mcc
|
|
value: [0.78888889 0.89893315 1. 0.89893315 0.80507649 1.
|
|
0.47777778 0.78888889 0.78888889 0.59554321]
|
|
|
|
mean value: 0.8042930442088367
|
|
|
|
key: train_mcc
|
|
value: [0.97660739 0.95321477 0.95321477 0.95321477 0.95321477 0.94157888
|
|
0.97660739 0.9649747 0.95321477 0.96497948]
|
|
|
|
mean value: 0.9590821707798497
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.94736842 1. 0.94736842 0.89473684 1.
|
|
0.73684211 0.89473684 0.89473684 0.78947368]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_accuracy
|
|
value: [0.98830409 0.97660819 0.97660819 0.97660819 0.97660819 0.97076023
|
|
0.98830409 0.98245614 0.97660819 0.98245614]
|
|
|
|
mean value: 0.97953216374269
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.94117647 1. 0.94117647 0.875 1.
|
|
0.73684211 0.9 0.9 0.77777778]
|
|
|
|
mean value: 0.8960861713106295
|
|
|
|
key: train_fscore
|
|
value: [0.98837209 0.97674419 0.97674419 0.97674419 0.97674419 0.9704142
|
|
0.98823529 0.98224852 0.97647059 0.98245614]
|
|
|
|
mean value: 0.9795173581806611
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 1. 1. 1.
|
|
0.77777778 0.9 0.9 0.875 ]
|
|
|
|
mean value: 0.9341666666666667
|
|
|
|
key: train_precision
|
|
value: [0.98837209 0.97674419 0.97674419 0.97674419 0.97674419 0.97619048
|
|
0.98823529 0.98809524 0.97647059 0.97674419]
|
|
|
|
mean value: 0.9801084619894469
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 1. 0.88888889 0.77777778 1.
|
|
0.7 0.9 0.9 0.7 ]
|
|
|
|
mean value: 0.8644444444444445
|
|
|
|
key: train_recall
|
|
value: [0.98837209 0.97674419 0.97674419 0.97674419 0.97674419 0.96470588
|
|
0.98823529 0.97647059 0.97647059 0.98823529]
|
|
|
|
mean value: 0.9789466484268126
|
|
|
|
key: test_roc_auc
|
|
value: [0.89444444 0.94444444 1. 0.94444444 0.88888889 1.
|
|
0.73888889 0.89444444 0.89444444 0.79444444]
|
|
|
|
mean value: 0.8994444444444444
|
|
|
|
key: train_roc_auc
|
|
value: [0.98830369 0.97660739 0.97660739 0.97660739 0.97660739 0.97072503
|
|
0.98830369 0.98242134 0.97660739 0.98248974]
|
|
|
|
mean value: 0.9795280437756497
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.88888889 1. 0.88888889 0.77777778 1.
|
|
0.58333333 0.81818182 0.81818182 0.63636364]
|
|
|
|
mean value: 0.8211616161616162
|
|
|
|
key: train_jcc
|
|
value: [0.97701149 0.95454545 0.95454545 0.95454545 0.95454545 0.94252874
|
|
0.97674419 0.96511628 0.95402299 0.96551724]
|
|
|
|
mean value: 0.9599122743068212
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03978562 0.03962374 0.03861666 0.03984666 0.03933811 0.03847075
|
|
0.03867912 0.0392909 0.03938913 0.03889823]
|
|
|
|
mean value: 0.03919389247894287
|
|
|
|
key: score_time
|
|
value: [0.01514792 0.01215363 0.01442266 0.01239681 0.01476765 0.01480198
|
|
0.01482081 0.01233411 0.01209879 0.01238394]
|
|
|
|
mean value: 0.01353282928466797
|
|
|
|
key: test_mcc
|
|
value: [0.72242312 0.91587302 0.91580648 0.86237318 0.85952381 0.94365079
|
|
0.94285714 0.97182532 0.8340361 0.82992752]
|
|
|
|
mean value: 0.8798296477481782
|
|
|
|
key: train_mcc
|
|
value: [0.92148635 0.91812906 0.915076 0.91812744 0.92448113 0.90877163
|
|
0.90573203 0.90566038 0.91211206 0.91840248]
|
|
|
|
mean value: 0.9147978560667899
|
|
|
|
key: test_accuracy
|
|
value: [0.85915493 0.95774648 0.95774648 0.92957746 0.92957746 0.97183099
|
|
0.97142857 0.98571429 0.91428571 0.91428571]
|
|
|
|
mean value: 0.9391348088531187
|
|
|
|
key: train_accuracy
|
|
value: [0.96062992 0.95905512 0.95748031 0.95905512 0.96220472 0.95433071
|
|
0.95283019 0.95283019 0.95597484 0.9591195 ]
|
|
|
|
mean value: 0.9573510622492943
|
|
|
|
key: test_fscore
|
|
value: [0.84848485 0.95774648 0.95652174 0.92753623 0.92957746 0.97222222
|
|
0.97142857 0.98591549 0.91891892 0.91666667]
|
|
|
|
mean value: 0.9385018635355439
|
|
|
|
key: train_fscore
|
|
value: [0.96025437 0.95899054 0.95721078 0.95886076 0.96190476 0.95389507
|
|
0.95253165 0.95283019 0.95555556 0.95873016]
|
|
|
|
mean value: 0.957076382631699
|
|
|
|
key: test_precision
|
|
value: [0.90322581 0.94444444 0.97058824 0.96969697 0.94285714 0.97222222
|
|
0.97142857 0.97222222 0.87179487 0.89189189]
|
|
|
|
mean value: 0.9410372378304067
|
|
|
|
key: train_precision
|
|
value: [0.97106109 0.96202532 0.96485623 0.96190476 0.96805112 0.96153846
|
|
0.95859873 0.95283019 0.96474359 0.96794872]
|
|
|
|
mean value: 0.9633558203875522
|
|
|
|
key: test_recall
|
|
value: [0.8 0.97142857 0.94285714 0.88888889 0.91666667 0.97222222
|
|
0.97142857 1. 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9377777777777778
|
|
|
|
key: train_recall
|
|
value: [0.94968553 0.95597484 0.94968553 0.95583596 0.95583596 0.94637224
|
|
0.94654088 0.95283019 0.94654088 0.94968553]
|
|
|
|
mean value: 0.950898756026427
|
|
|
|
key: test_roc_auc
|
|
value: [0.85833333 0.95793651 0.95753968 0.93015873 0.9297619 0.9718254
|
|
0.97142857 0.98571429 0.91428571 0.91428571]
|
|
|
|
mean value: 0.9391269841269841
|
|
|
|
key: train_roc_auc
|
|
value: [0.96064718 0.95905998 0.95749261 0.95905006 0.96219471 0.9543182
|
|
0.95283019 0.95283019 0.95597484 0.9591195 ]
|
|
|
|
mean value: 0.9573517449358173
|
|
|
|
key: test_jcc
|
|
value: [0.73684211 0.91891892 0.91666667 0.86486486 0.86842105 0.94594595
|
|
0.94444444 0.97222222 0.85 0.84615385]
|
|
|
|
mean value: 0.8864480067111646
|
|
|
|
key: train_jcc
|
|
value: [0.9235474 0.92121212 0.91793313 0.92097264 0.9266055 0.9118541
|
|
0.90936556 0.90990991 0.91489362 0.92073171]
|
|
|
|
mean value: 0.9177025697990997
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.86740947 1.08062315 1.05011129 1.05504608 1.08258605 1.51096869
|
|
1.35609174 1.35790992 1.6634028 1.0844686 ]
|
|
|
|
mean value: 1.2108617782592774
|
|
|
|
key: score_time
|
|
value: [0.01683784 0.01795936 0.01759243 0.01573634 0.01554418 0.01551008
|
|
0.02251482 0.01527715 0.0160327 0.01910043]
|
|
|
|
mean value: 0.01721053123474121
|
|
|
|
key: test_mcc
|
|
value: [0.91580648 0.97222222 0.94365079 0.88880092 0.91587302 0.94511009
|
|
0.91766294 0.94440028 0.80829038 0.94440028]
|
|
|
|
mean value: 0.9196217397623336
|
|
|
|
key: train_mcc
|
|
value: [0.9653592 0.97807345 0.98112159 0.96850386 0.9842961 0.9842961
|
|
0.9812097 1. 0.99061012 0.9812097 ]
|
|
|
|
mean value: 0.9814679839475534
|
|
|
|
key: test_accuracy
|
|
value: [0.95774648 0.98591549 0.97183099 0.94366197 0.95774648 0.97183099
|
|
0.95714286 0.97142857 0.9 0.97142857]
|
|
|
|
mean value: 0.9588732394366197
|
|
|
|
key: train_accuracy
|
|
value: [0.98267717 0.98897638 0.99055118 0.98425197 0.99212598 0.99212598
|
|
0.99056604 1. 0.99528302 0.99056604]
|
|
|
|
mean value: 0.9907123755756946
|
|
|
|
key: test_fscore
|
|
value: [0.95652174 0.98591549 0.97142857 0.94285714 0.95774648 0.97297297
|
|
0.95890411 0.97222222 0.90666667 0.97222222]
|
|
|
|
mean value: 0.959745761892026
|
|
|
|
key: train_fscore
|
|
value: [0.98267717 0.98907956 0.99059561 0.98422713 0.99215071 0.99215071
|
|
0.990625 1. 0.99530516 0.990625 ]
|
|
|
|
mean value: 0.9907436046351754
|
|
|
|
key: test_precision
|
|
value: [0.97058824 0.97222222 0.97142857 0.97058824 0.97142857 0.94736842
|
|
0.92105263 0.94594595 0.85 0.94594595]
|
|
|
|
mean value: 0.9466568780191071
|
|
|
|
key: train_precision
|
|
value: [0.98422713 0.98142415 0.9875 0.98422713 0.9875 0.9875
|
|
0.98447205 1. 0.99065421 0.98447205]
|
|
|
|
mean value: 0.9871976712268249
|
|
|
|
key: test_recall
|
|
value: [0.94285714 1. 0.97142857 0.91666667 0.94444444 1.
|
|
1. 1. 0.97142857 1. ]
|
|
|
|
mean value: 0.9746825396825397
|
|
|
|
key: train_recall
|
|
value: [0.98113208 0.99685535 0.99371069 0.98422713 0.99684543 0.99684543
|
|
0.99685535 1. 1. 0.99685535]
|
|
|
|
mean value: 0.9943326786104002
|
|
|
|
key: test_roc_auc
|
|
value: [0.95753968 0.98611111 0.9718254 0.94404762 0.95793651 0.97142857
|
|
0.95714286 0.97142857 0.9 0.97142857]
|
|
|
|
mean value: 0.9588888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.9826796 0.98896395 0.9905462 0.98425193 0.9921334 0.9921334
|
|
0.99056604 1. 0.99528302 0.99056604]
|
|
|
|
mean value: 0.9907123583913656
|
|
|
|
key: test_jcc
|
|
value: [0.91666667 0.97222222 0.94444444 0.89189189 0.91891892 0.94736842
|
|
0.92105263 0.94594595 0.82926829 0.94594595]
|
|
|
|
mean value: 0.9233725381350542
|
|
|
|
key: train_jcc
|
|
value: [0.96594427 0.97839506 0.98136646 0.9689441 0.98442368 0.98442368
|
|
0.98142415 1. 0.99065421 0.98142415]
|
|
|
|
mean value: 0.9816999748026448
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01552057 0.01288319 0.01323485 0.01080656 0.01259613 0.01399469
|
|
0.01070237 0.01538777 0.01220226 0.01187205]
|
|
|
|
mean value: 0.012920045852661132
|
|
|
|
key: score_time
|
|
value: [0.01230264 0.01067305 0.00993371 0.00953341 0.0092442 0.01275945
|
|
0.00995159 0.01274395 0.00995421 0.00998306]
|
|
|
|
mean value: 0.010707926750183106
|
|
|
|
key: test_mcc
|
|
value: [0.48650597 0.6656213 0.71917468 0.63253275 0.66322499 0.38125508
|
|
0.80295507 0.600982 0.68599434 0.65714286]
|
|
|
|
mean value: 0.6295389037769116
|
|
|
|
key: train_mcc
|
|
value: [0.67409574 0.72986179 0.69036982 0.64504761 0.66535701 0.65809768
|
|
0.70279197 0.69978988 0.72722458 0.71210159]
|
|
|
|
mean value: 0.6904737672015906
|
|
|
|
key: test_accuracy
|
|
value: [0.73239437 0.83098592 0.85915493 0.8028169 0.83098592 0.69014085
|
|
0.9 0.8 0.84285714 0.82857143]
|
|
|
|
mean value: 0.8117907444668008
|
|
|
|
key: train_accuracy
|
|
value: [0.83622047 0.86456693 0.84409449 0.81574803 0.83149606 0.82834646
|
|
0.85062893 0.8490566 0.86320755 0.85534591]
|
|
|
|
mean value: 0.8438711434655574
|
|
|
|
key: test_fscore
|
|
value: [0.6779661 0.81818182 0.85294118 0.77419355 0.82857143 0.68571429
|
|
0.89552239 0.80555556 0.84057971 0.82857143]
|
|
|
|
mean value: 0.8007797441351746
|
|
|
|
key: train_fscore
|
|
value: [0.83061889 0.86173633 0.83797054 0.7943761 0.82372323 0.82218597
|
|
0.84552846 0.84364821 0.85990338 0.85064935]
|
|
|
|
mean value: 0.8370340461106509
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.87096774 0.87878788 0.92307692 0.85294118 0.70588235
|
|
0.9375 0.78378378 0.85294118 0.82857143]
|
|
|
|
mean value: 0.8467785795371184
|
|
|
|
key: train_precision
|
|
value: [0.86148649 0.88157895 0.87372014 0.8968254 0.86206897 0.85135135
|
|
0.87542088 0.875 0.88118812 0.87919463]
|
|
|
|
mean value: 0.8737834909172908
|
|
|
|
key: test_recall
|
|
value: [0.57142857 0.77142857 0.82857143 0.66666667 0.80555556 0.66666667
|
|
0.85714286 0.82857143 0.82857143 0.82857143]
|
|
|
|
mean value: 0.7653174603174603
|
|
|
|
key: train_recall
|
|
value: [0.80188679 0.8427673 0.80503145 0.71293375 0.78864353 0.79495268
|
|
0.81761006 0.81446541 0.83962264 0.82389937]
|
|
|
|
mean value: 0.8041812987322183
|
|
|
|
key: test_roc_auc
|
|
value: [0.73015873 0.83015873 0.85873016 0.8047619 0.83134921 0.69047619
|
|
0.9 0.8 0.84285714 0.82857143]
|
|
|
|
mean value: 0.8117063492063492
|
|
|
|
key: train_roc_auc
|
|
value: [0.83627463 0.86460131 0.8441561 0.81558637 0.83142868 0.82829395
|
|
0.85062893 0.8490566 0.86320755 0.85534591]
|
|
|
|
mean value: 0.8438580044838602
|
|
|
|
key: test_jcc
|
|
value: [0.51282051 0.69230769 0.74358974 0.63157895 0.70731707 0.52173913
|
|
0.81081081 0.6744186 0.725 0.70731707]
|
|
|
|
mean value: 0.672689958832459
|
|
|
|
key: train_jcc
|
|
value: [0.71030641 0.75706215 0.72112676 0.65889213 0.70028011 0.69806094
|
|
0.73239437 0.72957746 0.75423729 0.74011299]
|
|
|
|
mean value: 0.720205060976602
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01253724 0.01098156 0.01111984 0.0110774 0.01121974 0.01122856
|
|
0.01257539 0.01135492 0.01214981 0.01198936]
|
|
|
|
mean value: 0.011623382568359375
|
|
|
|
key: score_time
|
|
value: [0.00946879 0.00899601 0.00952578 0.00924349 0.00911999 0.0092001
|
|
0.00999022 0.00969362 0.00937867 0.00924826]
|
|
|
|
mean value: 0.009386491775512696
|
|
|
|
key: test_mcc
|
|
value: [0.43675542 0.49681589 0.55043703 0.40826065 0.63643777 0.49323927
|
|
0.71545476 0.71428571 0.57735027 0.6882472 ]
|
|
|
|
mean value: 0.5717283983804675
|
|
|
|
key: train_mcc
|
|
value: [0.62526417 0.61260242 0.60033763 0.60314862 0.60335591 0.60655953
|
|
0.61389088 0.6229496 0.61025604 0.61357474]
|
|
|
|
mean value: 0.6111939545798641
|
|
|
|
key: test_accuracy
|
|
value: [0.71830986 0.74647887 0.77464789 0.70422535 0.81690141 0.74647887
|
|
0.85714286 0.85714286 0.78571429 0.84285714]
|
|
|
|
mean value: 0.7849899396378269
|
|
|
|
key: train_accuracy
|
|
value: [0.81259843 0.80629921 0.8 0.8015748 0.8015748 0.80314961
|
|
0.80660377 0.81132075 0.80503145 0.80660377]
|
|
|
|
mean value: 0.8054756598821374
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.75675676 0.77777778 0.71232877 0.8115942 0.75675676
|
|
0.86111111 0.85714286 0.8 0.84931507]
|
|
|
|
mean value: 0.7888665651001425
|
|
|
|
key: train_fscore
|
|
value: [0.81435257 0.80629921 0.80370943 0.80126183 0.80373832 0.80559876
|
|
0.81105991 0.81424149 0.80745342 0.80989181]
|
|
|
|
mean value: 0.8077606736470757
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.71794872 0.75675676 0.7027027 0.84848485 0.73684211
|
|
0.83783784 0.85714286 0.75 0.81578947]
|
|
|
|
mean value: 0.7750778027093816
|
|
|
|
key: train_precision
|
|
value: [0.80804954 0.80757098 0.79027356 0.80126183 0.79384615 0.79447853
|
|
0.79279279 0.80182927 0.79754601 0.79635258]
|
|
|
|
mean value: 0.7984001237801253
|
|
|
|
key: test_recall
|
|
value: [0.68571429 0.8 0.8 0.72222222 0.77777778 0.77777778
|
|
0.88571429 0.85714286 0.85714286 0.88571429]
|
|
|
|
mean value: 0.8049206349206349
|
|
|
|
key: train_recall
|
|
value: [0.82075472 0.80503145 0.81761006 0.80126183 0.81388013 0.8170347
|
|
0.83018868 0.82704403 0.81761006 0.82389937]
|
|
|
|
mean value: 0.8174315020931294
|
|
|
|
key: test_roc_auc
|
|
value: [0.71785714 0.74722222 0.775 0.70396825 0.81746032 0.74603175
|
|
0.85714286 0.85714286 0.78571429 0.84285714]
|
|
|
|
mean value: 0.7850396825396826
|
|
|
|
key: train_roc_auc
|
|
value: [0.81258556 0.80630121 0.79997222 0.80157431 0.80159415 0.80317144
|
|
0.80660377 0.81132075 0.80503145 0.80660377]
|
|
|
|
mean value: 0.805475864531873
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.60869565 0.63636364 0.55319149 0.68292683 0.60869565
|
|
0.75609756 0.75 0.66666667 0.73809524]
|
|
|
|
mean value: 0.6546187270533518
|
|
|
|
key: train_jcc
|
|
value: [0.68684211 0.67546174 0.67183463 0.66842105 0.671875 0.67447917
|
|
0.68217054 0.68668407 0.67708333 0.68051948]
|
|
|
|
mean value: 0.6775371120904725
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01397753 0.01156974 0.01058364 0.01132965 0.01123643 0.01125622
|
|
0.01117158 0.01139212 0.011127 0.0111227 ]
|
|
|
|
mean value: 0.011476659774780273
|
|
|
|
key: score_time
|
|
value: [0.0376544 0.01348925 0.01403856 0.01416183 0.01403022 0.01358771
|
|
0.01368356 0.0139389 0.01882291 0.01830149]
|
|
|
|
mean value: 0.01717088222503662
|
|
|
|
key: test_mcc
|
|
value: [0.47545167 0.74940548 0.63412698 0.77460317 0.55201766 0.46507937
|
|
0.71899664 0.51449576 0.77651637 0.57166195]
|
|
|
|
mean value: 0.6232355043371502
|
|
|
|
key: train_mcc
|
|
value: [0.78471399 0.78007134 0.78508785 0.77380965 0.78521062 0.78013813
|
|
0.78263766 0.7781197 0.78780508 0.7732324 ]
|
|
|
|
mean value: 0.781082642136331
|
|
|
|
key: test_accuracy
|
|
value: [0.73239437 0.87323944 0.81690141 0.88732394 0.77464789 0.73239437
|
|
0.85714286 0.75714286 0.88571429 0.78571429]
|
|
|
|
mean value: 0.810261569416499
|
|
|
|
key: train_accuracy
|
|
value: [0.89133858 0.88818898 0.89133858 0.88503937 0.88976378 0.88818898
|
|
0.88836478 0.88836478 0.89150943 0.88522013]
|
|
|
|
mean value: 0.888731738721339
|
|
|
|
key: test_fscore
|
|
value: [0.75324675 0.87671233 0.81690141 0.88888889 0.78947368 0.73239437
|
|
0.86486486 0.76056338 0.89189189 0.78873239]
|
|
|
|
mean value: 0.8163669961165823
|
|
|
|
key: train_fscore
|
|
value: [0.8952959 0.89355322 0.89561271 0.89022556 0.89583333 0.89323308
|
|
0.89481481 0.89160305 0.89716841 0.88989442]
|
|
|
|
mean value: 0.8937234507160731
|
|
|
|
key: test_precision
|
|
value: [0.69047619 0.84210526 0.80555556 0.88888889 0.75 0.74285714
|
|
0.82051282 0.75 0.84615385 0.77777778]
|
|
|
|
mean value: 0.7914327485380117
|
|
|
|
key: train_precision
|
|
value: [0.86510264 0.85386819 0.86297376 0.85057471 0.84788732 0.85344828
|
|
0.84593838 0.86646884 0.85269122 0.85507246]
|
|
|
|
mean value: 0.8554025807499486
|
|
|
|
key: test_recall
|
|
value: [0.82857143 0.91428571 0.82857143 0.88888889 0.83333333 0.72222222
|
|
0.91428571 0.77142857 0.94285714 0.8 ]
|
|
|
|
mean value: 0.8444444444444444
|
|
|
|
key: train_recall
|
|
value: [0.92767296 0.93710692 0.93081761 0.93375394 0.94952681 0.93690852
|
|
0.94968553 0.91823899 0.94654088 0.92767296]
|
|
|
|
mean value: 0.9357925123504554
|
|
|
|
key: test_roc_auc
|
|
value: [0.73373016 0.87380952 0.81706349 0.88730159 0.77380952 0.73253968
|
|
0.85714286 0.75714286 0.88571429 0.78571429]
|
|
|
|
mean value: 0.8103968253968254
|
|
|
|
key: train_roc_auc
|
|
value: [0.89128127 0.88811182 0.89127631 0.88511597 0.88985775 0.88826558
|
|
0.88836478 0.88836478 0.89150943 0.88522013]
|
|
|
|
mean value: 0.8887367815407813
|
|
|
|
key: test_jcc
|
|
value: [0.60416667 0.7804878 0.69047619 0.8 0.65217391 0.57777778
|
|
0.76190476 0.61363636 0.80487805 0.65116279]
|
|
|
|
mean value: 0.693666431786145
|
|
|
|
key: train_jcc
|
|
value: [0.81043956 0.80758808 0.8109589 0.80216802 0.81132075 0.80706522
|
|
0.80965147 0.80440771 0.81351351 0.80163043]
|
|
|
|
mean value: 0.8078743670543986
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02836943 0.02841258 0.03047156 0.02790475 0.02816534 0.02844357
|
|
0.02933002 0.0288651 0.02778006 0.02823734]
|
|
|
|
mean value: 0.02859797477722168
|
|
|
|
key: score_time
|
|
value: [0.01366615 0.01373053 0.01391363 0.01363516 0.01351142 0.01377559
|
|
0.01364255 0.01361656 0.01374435 0.01371479]
|
|
|
|
mean value: 0.013695073127746583
|
|
|
|
key: test_mcc
|
|
value: [0.69219293 0.71961897 0.74662454 0.71917468 0.8594125 0.83214239
|
|
0.82992752 0.860309 0.7581754 0.80032673]
|
|
|
|
mean value: 0.7817904663664247
|
|
|
|
key: train_mcc
|
|
value: [0.8488829 0.85200397 0.82686947 0.84567387 0.87108203 0.8521804
|
|
0.83647799 0.83343636 0.84283398 0.85220547]
|
|
|
|
mean value: 0.8461646429835071
|
|
|
|
key: test_accuracy
|
|
value: [0.84507042 0.85915493 0.87323944 0.85915493 0.92957746 0.91549296
|
|
0.91428571 0.92857143 0.87142857 0.9 ]
|
|
|
|
mean value: 0.8895975855130784
|
|
|
|
key: train_accuracy
|
|
value: [0.92440945 0.92598425 0.91338583 0.92283465 0.93543307 0.92598425
|
|
0.91823899 0.91666667 0.92138365 0.92610063]
|
|
|
|
mean value: 0.9230421433169911
|
|
|
|
key: test_fscore
|
|
value: [0.8358209 0.86111111 0.86956522 0.86486486 0.93150685 0.91891892
|
|
0.91666667 0.93150685 0.88311688 0.89855072]
|
|
|
|
mean value: 0.8911628980859956
|
|
|
|
key: train_fscore
|
|
value: [0.925 0.92644757 0.91419657 0.92283465 0.93603744 0.92667707
|
|
0.91823899 0.91600634 0.92088608 0.92621664]
|
|
|
|
mean value: 0.9232541345753876
|
|
|
|
key: test_precision
|
|
value: [0.875 0.83783784 0.88235294 0.84210526 0.91891892 0.89473684
|
|
0.89189189 0.89473684 0.80952381 0.91176471]
|
|
|
|
mean value: 0.8758869052599703
|
|
|
|
key: train_precision
|
|
value: [0.91925466 0.92211838 0.90712074 0.92138365 0.92592593 0.91666667
|
|
0.91823899 0.92332268 0.92675159 0.92476489]
|
|
|
|
mean value: 0.920554818192837
|
|
|
|
key: test_recall
|
|
value: [0.8 0.88571429 0.85714286 0.88888889 0.94444444 0.94444444
|
|
0.94285714 0.97142857 0.97142857 0.88571429]
|
|
|
|
mean value: 0.9092063492063491
|
|
|
|
key: train_recall
|
|
value: [0.93081761 0.93081761 0.92138365 0.92429022 0.94637224 0.93690852
|
|
0.91823899 0.90880503 0.91509434 0.92767296]
|
|
|
|
mean value: 0.9260401166597226
|
|
|
|
key: test_roc_auc
|
|
value: [0.84444444 0.85952381 0.87301587 0.85873016 0.92936508 0.91507937
|
|
0.91428571 0.92857143 0.87142857 0.9 ]
|
|
|
|
mean value: 0.8894444444444444
|
|
|
|
key: train_roc_auc
|
|
value: [0.92439934 0.92597663 0.91337321 0.92283693 0.93545027 0.92600143
|
|
0.91823899 0.91666667 0.92138365 0.92610063]
|
|
|
|
mean value: 0.923042775231633
|
|
|
|
key: test_jcc
|
|
value: [0.71794872 0.75609756 0.76923077 0.76190476 0.87179487 0.85
|
|
0.84615385 0.87179487 0.79069767 0.81578947]
|
|
|
|
mean value: 0.8051412547906264
|
|
|
|
key: train_jcc
|
|
value: [0.86046512 0.86297376 0.84195402 0.85672515 0.8797654 0.86337209
|
|
0.84883721 0.84502924 0.85337243 0.8625731 ]
|
|
|
|
mean value: 0.8575067517818242
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.52470732 2.5612905 2.15969515 2.56258512 2.21599984 2.2796092
|
|
2.30010676 2.8347733 2.63528991 2.01732135]
|
|
|
|
mean value: 2.4091378450393677
|
|
|
|
key: score_time
|
|
value: [0.01242948 0.01246071 0.012398 0.03180456 0.01239038 0.01241946
|
|
0.01239514 0.01470327 0.01980495 0.01323724]
|
|
|
|
mean value: 0.015404319763183594
|
|
|
|
key: test_mcc
|
|
value: [0.91885703 0.9451949 0.9451949 0.84273607 0.9451949 0.94365079
|
|
0.91766294 0.97182532 0.80829038 0.80295507]
|
|
|
|
mean value: 0.9041562282458467
|
|
|
|
key: train_mcc
|
|
value: [0.99685531 0.99685531 0.99059524 0.99372055 0.99685535 0.99055602
|
|
0.98744091 1. 0.99686027 0.98432053]
|
|
|
|
mean value: 0.993405950957083
|
|
|
|
key: test_accuracy
|
|
value: [0.95774648 0.97183099 0.97183099 0.91549296 0.97183099 0.97183099
|
|
0.95714286 0.98571429 0.9 0.9 ]
|
|
|
|
mean value: 0.9503420523138834
|
|
|
|
key: train_accuracy
|
|
value: [0.9984252 0.9984252 0.99527559 0.99685039 0.9984252 0.99527559
|
|
0.99371069 1. 0.99842767 0.99213836]
|
|
|
|
mean value: 0.9966953894914079
|
|
|
|
key: test_fscore
|
|
value: [0.95890411 0.97222222 0.97222222 0.92307692 0.97142857 0.97222222
|
|
0.95890411 0.98591549 0.90666667 0.90410959]
|
|
|
|
mean value: 0.9525672129015752
|
|
|
|
key: train_fscore
|
|
value: [0.99843014 0.99843014 0.99530516 0.99685535 0.9984252 0.99526066
|
|
0.99373041 1. 0.99843014 0.99217527]
|
|
|
|
mean value: 0.9967042475839479
|
|
|
|
key: test_precision
|
|
value: [0.92105263 0.94594595 0.94594595 0.85714286 1. 0.97222222
|
|
0.92105263 0.97222222 0.85 0.86842105]
|
|
|
|
mean value: 0.9254005509268667
|
|
|
|
key: train_precision
|
|
value: [0.9968652 0.9968652 0.99065421 0.99373041 0.99685535 0.99683544
|
|
0.990625 1. 0.9968652 0.98753894]
|
|
|
|
mean value: 0.9946834954176147
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.94444444 0.97222222
|
|
1. 1. 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9830952380952381
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 0.99369085
|
|
0.99685535 1. 1. 0.99685535]
|
|
|
|
mean value: 0.9987401543558915
|
|
|
|
key: test_roc_auc
|
|
value: [0.95833333 0.97222222 0.97222222 0.91428571 0.97222222 0.9718254
|
|
0.95714286 0.98571429 0.9 0.9 ]
|
|
|
|
mean value: 0.9503968253968254
|
|
|
|
key: train_roc_auc
|
|
value: [0.99842271 0.99842271 0.99526814 0.99685535 0.99842767 0.9952731
|
|
0.99371069 1. 0.99842767 0.99213836]
|
|
|
|
mean value: 0.9966946411919926
|
|
|
|
key: test_jcc
|
|
value: [0.92105263 0.94594595 0.94594595 0.85714286 0.94444444 0.94594595
|
|
0.92105263 0.97222222 0.82926829 0.825 ]
|
|
|
|
mean value: 0.9108020917488183
|
|
|
|
key: train_jcc
|
|
value: [0.9968652 0.9968652 0.99065421 0.99373041 0.99685535 0.99056604
|
|
0.98753894 1. 0.9968652 0.98447205]
|
|
|
|
mean value: 0.9934412598563462
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03410077 0.02766514 0.0230999 0.02508998 0.02438259 0.0267725
|
|
0.02709556 0.02268052 0.02555275 0.02668524]
|
|
|
|
mean value: 0.026312494277954103
|
|
|
|
key: score_time
|
|
value: [0.01193619 0.00938582 0.00908685 0.00905204 0.00900221 0.00966692
|
|
0.00914454 0.0091188 0.00910354 0.00893569]
|
|
|
|
mean value: 0.009443259239196778
|
|
|
|
key: test_mcc
|
|
value: [0.9451949 1. 0.94365079 0.94511009 0.9451949 0.97220047
|
|
0.97182532 0.94440028 0.8660254 0.94440028]
|
|
|
|
mean value: 0.9478002426318035
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 1. 0.97183099 0.97183099 0.97183099 0.98591549
|
|
0.98571429 0.97142857 0.92857143 0.97142857]
|
|
|
|
mean value: 0.9730382293762576
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97222222 1. 0.97142857 0.97297297 0.97142857 0.98630137
|
|
0.98591549 0.97222222 0.93333333 0.97222222]
|
|
|
|
mean value: 0.9738046978650876
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94594595 1. 0.97142857 0.94736842 1. 0.97297297
|
|
0.97222222 0.94594595 0.875 0.94594595]
|
|
|
|
mean value: 0.9576830025514236
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97142857 1. 0.94444444 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9915873015873016
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97222222 1. 0.9718254 0.97142857 0.97222222 0.98571429
|
|
0.98571429 0.97142857 0.92857143 0.97142857]
|
|
|
|
mean value: 0.9730555555555556
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94594595 1. 0.94444444 0.94736842 0.94444444 0.97297297
|
|
0.97222222 0.94594595 0.875 0.94594595]
|
|
|
|
mean value: 0.9494290342974554
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13252306 0.13552666 0.12923884 0.12894249 0.13015294 0.13013697
|
|
0.12967587 0.1288631 0.12696624 0.12958503]
|
|
|
|
mean value: 0.13016111850738527
|
|
|
|
key: score_time
|
|
value: [0.01883793 0.01967883 0.01804328 0.01832151 0.01855874 0.01826692
|
|
0.01828146 0.01809955 0.01825643 0.01824641]
|
|
|
|
mean value: 0.018459105491638185
|
|
|
|
key: test_mcc
|
|
value: [0.97222222 1. 0.94365079 1. 0.9451949 0.91587302
|
|
0.97182532 0.97182532 0.8340361 0.94285714]
|
|
|
|
mean value: 0.9497484800647252
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 1. 0.97183099 1. 0.97183099 0.95774648
|
|
0.98571429 0.98571429 0.91428571 0.97142857]
|
|
|
|
mean value: 0.974446680080483
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98591549 1. 0.97142857 1. 0.97142857 0.95774648
|
|
0.98591549 0.98591549 0.91891892 0.97142857]
|
|
|
|
mean value: 0.9748697590951112
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97222222 1. 0.97142857 1. 1. 0.97142857
|
|
0.97222222 0.97222222 0.87179487 0.97142857]
|
|
|
|
mean value: 0.9702747252747252
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97142857 1. 0.94444444 0.94444444
|
|
1. 1. 0.97142857 0.97142857]
|
|
|
|
mean value: 0.9803174603174603
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98611111 1. 0.9718254 1. 0.97222222 0.95793651
|
|
0.98571429 0.98571429 0.91428571 0.97142857]
|
|
|
|
mean value: 0.9745238095238096
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97222222 1. 0.94444444 1. 0.94444444 0.91891892
|
|
0.97222222 0.97222222 0.85 0.94444444]
|
|
|
|
mean value: 0.9518918918918919
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01104712 0.01110101 0.01102614 0.01074004 0.01092267 0.0111022
|
|
0.01075745 0.01085114 0.01091075 0.01093817]
|
|
|
|
mean value: 0.010939669609069825
|
|
|
|
key: score_time
|
|
value: [0.0091362 0.0090034 0.0089736 0.00901842 0.00891519 0.00885844
|
|
0.00898552 0.00896597 0.0089798 0.00884628]
|
|
|
|
mean value: 0.008968281745910644
|
|
|
|
key: test_mcc
|
|
value: [0.79566006 0.77991323 0.88880092 0.79446135 0.88730159 0.79446135
|
|
0.84102145 0.88571429 0.73370909 0.75055535]
|
|
|
|
mean value: 0.8151598663146028
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88732394 0.88732394 0.94366197 0.88732394 0.94366197 0.88732394
|
|
0.91428571 0.94285714 0.85714286 0.87142857]
|
|
|
|
mean value: 0.9022334004024145
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8974359 0.89189189 0.94444444 0.9 0.94444444 0.9
|
|
0.92105263 0.94285714 0.87179487 0.88 ]
|
|
|
|
mean value: 0.909392132444764
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81395349 0.84615385 0.91891892 0.81818182 0.94444444 0.81818182
|
|
0.85365854 0.94285714 0.79069767 0.825 ]
|
|
|
|
mean value: 0.8572047688114053
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.94285714 0.97142857 1. 0.94444444 1.
|
|
1. 0.94285714 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9715873015873016
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88888889 0.88809524 0.94404762 0.88571429 0.94365079 0.88571429
|
|
0.91428571 0.94285714 0.85714286 0.87142857]
|
|
|
|
mean value: 0.9021825396825397
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.81395349 0.80487805 0.89473684 0.81818182 0.89473684 0.81818182
|
|
0.85365854 0.89189189 0.77272727 0.78571429]
|
|
|
|
mean value: 0.834866084464556
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.89812279 1.93032074 1.87976289 1.89733982 1.9241519 1.95350766
|
|
2.11095142 1.92987728 1.9015305 1.87068248]
|
|
|
|
mean value: 1.9296247482299804
|
|
|
|
key: score_time
|
|
value: [0.09710026 0.10486388 0.09680367 0.09640336 0.10592008 0.10394478
|
|
0.10473537 0.10404921 0.10246229 0.09675407]
|
|
|
|
mean value: 0.10130369663238525
|
|
|
|
key: test_mcc
|
|
value: [0.97222222 1. 0.94365079 0.97220047 1. 0.97220047
|
|
0.97182532 0.97182532 0.8340361 0.91465912]
|
|
|
|
mean value: 0.9552619796692498
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 1. 0.97183099 0.98591549 1. 0.98591549
|
|
0.98571429 0.98571429 0.91428571 0.95714286]
|
|
|
|
mean value: 0.9772434607645876
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98591549 1. 0.97142857 0.98630137 1. 0.98630137
|
|
0.98591549 0.98591549 0.91891892 0.95774648]
|
|
|
|
mean value: 0.9778443187819996
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97222222 1. 0.97142857 0.97297297 1. 0.97297297
|
|
0.97222222 0.97222222 0.87179487 0.94444444]
|
|
|
|
mean value: 0.96502805002805
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97142857 1. 1. 1.
|
|
1. 1. 0.97142857 0.97142857]
|
|
|
|
mean value: 0.9914285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98611111 1. 0.9718254 0.98571429 1. 0.98571429
|
|
0.98571429 0.98571429 0.91428571 0.95714286]
|
|
|
|
mean value: 0.9772222222222222
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97222222 1. 0.94444444 0.97297297 1. 0.97297297
|
|
0.97222222 0.97222222 0.85 0.91891892]
|
|
|
|
mean value: 0.9575975975975977
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.9811151 0.99474549 1.02433538 1.03899741 1.04609704 1.05840588
|
|
1.00599098 0.9883976 0.99367881 0.98470497]
|
|
|
|
mean value: 1.0116468667984009
|
|
|
|
key: score_time
|
|
value: [0.25700164 0.28620291 0.24107289 0.24569464 0.22783852 0.24015284
|
|
0.28249407 0.23092079 0.17062545 0.29615259]
|
|
|
|
mean value: 0.2478156328201294
|
|
|
|
key: test_mcc
|
|
value: [0.97222222 1. 0.91580648 0.97220047 0.97222222 0.97220047
|
|
0.97182532 0.97182532 0.8340361 0.8871639 ]
|
|
|
|
mean value: 0.9469502489680096
|
|
|
|
key: train_mcc
|
|
value: [0.97818972 0.97818972 0.97807345 0.9812781 0.97497794 0.9812781
|
|
0.98130676 0.98130676 0.98439842 0.97822445]
|
|
|
|
mean value: 0.9797223427936104
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 1. 0.95774648 0.98591549 0.98591549 0.98591549
|
|
0.98571429 0.98571429 0.91428571 0.94285714]
|
|
|
|
mean value: 0.9729979879275654
|
|
|
|
key: train_accuracy
|
|
value: [0.98897638 0.98897638 0.98897638 0.99055118 0.98740157 0.99055118
|
|
0.99056604 0.99056604 0.99213836 0.98899371]
|
|
|
|
mean value: 0.9897697221809538
|
|
|
|
key: test_fscore
|
|
value: [0.98591549 1. 0.95652174 0.98630137 0.98591549 0.98630137
|
|
0.98591549 0.98591549 0.91891892 0.94444444]
|
|
|
|
mean value: 0.9736149814050812
|
|
|
|
key: train_fscore
|
|
value: [0.98911353 0.98911353 0.98907956 0.990625 0.9875 0.990625
|
|
0.99065421 0.99065421 0.99219969 0.98911353]
|
|
|
|
mean value: 0.9898678253364782
|
|
|
|
key: test_precision
|
|
value: [0.97222222 1. 0.97058824 0.97297297 1. 0.97297297
|
|
0.97222222 0.97222222 0.87179487 0.91891892]
|
|
|
|
mean value: 0.9623914638620521
|
|
|
|
key: train_precision
|
|
value: [0.97846154 0.97846154 0.98142415 0.98142415 0.97832817 0.98142415
|
|
0.98148148 0.98148148 0.98452012 0.97846154]
|
|
|
|
mean value: 0.9805468321381634
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.94285714 1. 0.97222222 1.
|
|
1. 1. 0.97142857 0.97142857]
|
|
|
|
mean value: 0.9857936507936508
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.99685535 1. 0.99684543 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9993700771779458
|
|
|
|
key: test_roc_auc
|
|
value: [0.98611111 1. 0.95753968 0.98571429 0.98611111 0.98571429
|
|
0.98571429 0.98571429 0.91428571 0.94285714]
|
|
|
|
mean value: 0.9729761904761904
|
|
|
|
key: train_roc_auc
|
|
value: [0.98895899 0.98895899 0.98896395 0.99056604 0.98741642 0.99056604
|
|
0.99056604 0.99056604 0.99213836 0.98899371]
|
|
|
|
mean value: 0.9897694581671727
|
|
|
|
key: test_jcc
|
|
value: [0.97222222 1. 0.91666667 0.97297297 0.97222222 0.97297297
|
|
0.97222222 0.97222222 0.85 0.89473684]
|
|
|
|
mean value: 0.9496238343606764
|
|
|
|
key: train_jcc
|
|
value: [0.97846154 0.97846154 0.97839506 0.98142415 0.97530864 0.98142415
|
|
0.98148148 0.98148148 0.98452012 0.97846154]
|
|
|
|
mean value: 0.9799419703103914
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02487111 0.01109767 0.01111317 0.01194763 0.01112103 0.01127791
|
|
0.01101685 0.01099205 0.01097155 0.01110864]
|
|
|
|
mean value: 0.01255176067352295
|
|
|
|
key: score_time
|
|
value: [0.01018596 0.00928998 0.00939631 0.00924277 0.00931168 0.0092411
|
|
0.00993609 0.00935626 0.00928593 0.00925684]
|
|
|
|
mean value: 0.009450292587280274
|
|
|
|
key: test_mcc
|
|
value: [0.43675542 0.49681589 0.55043703 0.40826065 0.63643777 0.49323927
|
|
0.71545476 0.71428571 0.57735027 0.6882472 ]
|
|
|
|
mean value: 0.5717283983804675
|
|
|
|
key: train_mcc
|
|
value: [0.62526417 0.61260242 0.60033763 0.60314862 0.60335591 0.60655953
|
|
0.61389088 0.6229496 0.61025604 0.61357474]
|
|
|
|
mean value: 0.6111939545798641
|
|
|
|
key: test_accuracy
|
|
value: [0.71830986 0.74647887 0.77464789 0.70422535 0.81690141 0.74647887
|
|
0.85714286 0.85714286 0.78571429 0.84285714]
|
|
|
|
mean value: 0.7849899396378269
|
|
|
|
key: train_accuracy
|
|
value: [0.81259843 0.80629921 0.8 0.8015748 0.8015748 0.80314961
|
|
0.80660377 0.81132075 0.80503145 0.80660377]
|
|
|
|
mean value: 0.8054756598821374
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.75675676 0.77777778 0.71232877 0.8115942 0.75675676
|
|
0.86111111 0.85714286 0.8 0.84931507]
|
|
|
|
mean value: 0.7888665651001425
|
|
|
|
key: train_fscore
|
|
value: [0.81435257 0.80629921 0.80370943 0.80126183 0.80373832 0.80559876
|
|
0.81105991 0.81424149 0.80745342 0.80989181]
|
|
|
|
mean value: 0.8077606736470757
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.71794872 0.75675676 0.7027027 0.84848485 0.73684211
|
|
0.83783784 0.85714286 0.75 0.81578947]
|
|
|
|
mean value: 0.7750778027093816
|
|
|
|
key: train_precision
|
|
value: [0.80804954 0.80757098 0.79027356 0.80126183 0.79384615 0.79447853
|
|
0.79279279 0.80182927 0.79754601 0.79635258]
|
|
|
|
mean value: 0.7984001237801253
|
|
|
|
key: test_recall
|
|
value: [0.68571429 0.8 0.8 0.72222222 0.77777778 0.77777778
|
|
0.88571429 0.85714286 0.85714286 0.88571429]
|
|
|
|
mean value: 0.8049206349206349
|
|
|
|
key: train_recall
|
|
value: [0.82075472 0.80503145 0.81761006 0.80126183 0.81388013 0.8170347
|
|
0.83018868 0.82704403 0.81761006 0.82389937]
|
|
|
|
mean value: 0.8174315020931294
|
|
|
|
key: test_roc_auc
|
|
value: [0.71785714 0.74722222 0.775 0.70396825 0.81746032 0.74603175
|
|
0.85714286 0.85714286 0.78571429 0.84285714]
|
|
|
|
mean value: 0.7850396825396826
|
|
|
|
key: train_roc_auc
|
|
value: [0.81258556 0.80630121 0.79997222 0.80157431 0.80159415 0.80317144
|
|
0.80660377 0.81132075 0.80503145 0.80660377]
|
|
|
|
mean value: 0.805475864531873
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.60869565 0.63636364 0.55319149 0.68292683 0.60869565
|
|
0.75609756 0.75 0.66666667 0.73809524]
|
|
|
|
mean value: 0.6546187270533518
|
|
|
|
key: train_jcc
|
|
value: [0.68684211 0.67546174 0.67183463 0.66842105 0.671875 0.67447917
|
|
0.68217054 0.68668407 0.67708333 0.68051948]
|
|
|
|
mean value: 0.6775371120904725
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.1125772 0.07779884 0.07666707 0.07768774 0.07903576 0.07717824
|
|
0.23021722 0.07503581 0.07449484 0.07649565]
|
|
|
|
mean value: 0.0957188367843628
|
|
|
|
key: score_time
|
|
value: [0.01106191 0.01120138 0.01102638 0.0110786 0.01113892 0.01110387
|
|
0.01111913 0.01120377 0.01116037 0.01105285]
|
|
|
|
mean value: 0.011114716529846191
|
|
|
|
key: test_mcc
|
|
value: [0.97222222 1. 0.94365079 0.97220047 0.9451949 0.97220047
|
|
0.97182532 0.97182532 0.8660254 0.97182532]
|
|
|
|
mean value: 0.9586970195178569
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 1. 0.97183099 0.98591549 0.97183099 0.98591549
|
|
0.98571429 0.98571429 0.92857143 0.98571429]
|
|
|
|
mean value: 0.9787122736418511
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98591549 1. 0.97142857 0.98630137 0.97142857 0.98630137
|
|
0.98591549 0.98591549 0.93333333 0.98591549]
|
|
|
|
mean value: 0.979245518774749
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97222222 1. 0.97142857 0.97297297 1. 0.97297297
|
|
0.97222222 0.97222222 0.875 0.97222222]
|
|
|
|
mean value: 0.9681263406263406
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97142857 1. 0.94444444 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9915873015873016
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98611111 1. 0.9718254 0.98571429 0.97222222 0.98571429
|
|
0.98571429 0.98571429 0.92857143 0.98571429]
|
|
|
|
mean value: 0.9787301587301587
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97222222 1. 0.94444444 0.97297297 0.94444444 0.97297297
|
|
0.97222222 0.97222222 0.875 0.97222222]
|
|
|
|
mean value: 0.9598723723723723
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05058384 0.08101773 0.0796783 0.08235264 0.08187366 0.04846835
|
|
0.08964729 0.08472157 0.08456278 0.09591699]
|
|
|
|
mean value: 0.07788231372833251
|
|
|
|
key: score_time
|
|
value: [0.01876688 0.01912355 0.01892328 0.01883245 0.01232243 0.01223588
|
|
0.01885676 0.02481174 0.01882434 0.01870203]
|
|
|
|
mean value: 0.01813993453979492
|
|
|
|
key: test_mcc
|
|
value: [0.91580648 0.9451949 0.94365079 0.85952381 0.91885703 0.88730159
|
|
0.8871639 0.97182532 0.73370909 0.77269114]
|
|
|
|
mean value: 0.8835724048794499
|
|
|
|
key: train_mcc
|
|
value: [0.95276075 0.94649961 0.94657527 0.9559054 0.94646099 0.9401617
|
|
0.94654556 0.94970432 0.95605048 0.946583 ]
|
|
|
|
mean value: 0.948724707469986
|
|
|
|
key: test_accuracy
|
|
value: [0.95774648 0.97183099 0.97183099 0.92957746 0.95774648 0.94366197
|
|
0.94285714 0.98571429 0.85714286 0.88571429]
|
|
|
|
mean value: 0.9403822937625754
|
|
|
|
key: train_accuracy
|
|
value: [0.97637795 0.97322835 0.97322835 0.97795276 0.97322835 0.97007874
|
|
0.97327044 0.97484277 0.97798742 0.97327044]
|
|
|
|
mean value: 0.9743465557371366
|
|
|
|
key: test_fscore
|
|
value: [0.95652174 0.97222222 0.97142857 0.92957746 0.95652174 0.94444444
|
|
0.94444444 0.98591549 0.87179487 0.88888889]
|
|
|
|
mean value: 0.9421759879230791
|
|
|
|
key: train_fscore
|
|
value: [0.97637795 0.97314376 0.97305864 0.97791798 0.97314376 0.9699842
|
|
0.97322835 0.97476341 0.9778481 0.97314376]
|
|
|
|
mean value: 0.9742609907407577
|
|
|
|
key: test_precision
|
|
value: [0.97058824 0.94594595 0.97142857 0.94285714 1. 0.94444444
|
|
0.91891892 0.97222222 0.79069767 0.86486486]
|
|
|
|
mean value: 0.9321968020394833
|
|
|
|
key: train_precision
|
|
value: [0.97791798 0.97777778 0.98083067 0.97791798 0.97468354 0.97151899
|
|
0.97476341 0.9778481 0.98407643 0.97777778]
|
|
|
|
mean value: 0.9775112661599659
|
|
|
|
key: test_recall
|
|
value: [0.94285714 1. 0.97142857 0.91666667 0.91666667 0.94444444
|
|
0.97142857 1. 0.97142857 0.91428571]
|
|
|
|
mean value: 0.954920634920635
|
|
|
|
key: train_recall
|
|
value: [0.97484277 0.96855346 0.96540881 0.97791798 0.97160883 0.96845426
|
|
0.97169811 0.97169811 0.97169811 0.96855346]
|
|
|
|
mean value: 0.9710433902743885
|
|
|
|
key: test_roc_auc
|
|
value: [0.95753968 0.97222222 0.9718254 0.9297619 0.95833333 0.94365079
|
|
0.94285714 0.98571429 0.85714286 0.88571429]
|
|
|
|
mean value: 0.9404761904761905
|
|
|
|
key: train_roc_auc
|
|
value: [0.97638037 0.97323572 0.97324068 0.9779527 0.9732258 0.97007619
|
|
0.97327044 0.97484277 0.97798742 0.97327044]
|
|
|
|
mean value: 0.9743482530801738
|
|
|
|
key: test_jcc
|
|
value: [0.91666667 0.94594595 0.94444444 0.86842105 0.91666667 0.89473684
|
|
0.89473684 0.97222222 0.77272727 0.8 ]
|
|
|
|
mean value: 0.8926567955515324
|
|
|
|
key: train_jcc
|
|
value: [0.95384615 0.94769231 0.94753086 0.95679012 0.94769231 0.94171779
|
|
0.94785276 0.95076923 0.95665635 0.94769231]
|
|
|
|
mean value: 0.9498240194243094
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0148201 0.01372743 0.01160526 0.01040149 0.01132989 0.01087952
|
|
0.01023316 0.01087379 0.01035428 0.01052046]
|
|
|
|
mean value: 0.011474537849426269
|
|
|
|
key: score_time
|
|
value: [0.01198316 0.01021409 0.00898409 0.00960326 0.00976968 0.00921512
|
|
0.0089519 0.00918412 0.00963855 0.00898981]
|
|
|
|
mean value: 0.009653377532958984
|
|
|
|
key: test_mcc
|
|
value: [0.52142857 0.6666743 0.7468254 0.49323927 0.66190476 0.63557346
|
|
0.6882472 0.75055535 0.66701701 0.77651637]
|
|
|
|
mean value: 0.6607981691384757
|
|
|
|
key: train_mcc
|
|
value: [0.71043636 0.68807109 0.6523755 0.6837212 0.6414236 0.6798613
|
|
0.69156094 0.69081736 0.6570183 0.67892969]
|
|
|
|
mean value: 0.67742153493731
|
|
|
|
key: test_accuracy
|
|
value: [0.76056338 0.83098592 0.87323944 0.74647887 0.83098592 0.81690141
|
|
0.84285714 0.87142857 0.82857143 0.88571429]
|
|
|
|
mean value: 0.8287726358148894
|
|
|
|
key: train_accuracy
|
|
value: [0.85354331 0.84251969 0.82519685 0.84094488 0.82047244 0.83937008
|
|
0.84433962 0.84433962 0.82704403 0.83805031]
|
|
|
|
mean value: 0.8375820829000149
|
|
|
|
key: test_fscore
|
|
value: [0.76056338 0.83783784 0.87323944 0.75675676 0.83333333 0.82666667
|
|
0.84931507 0.88 0.84210526 0.89189189]
|
|
|
|
mean value: 0.835170963503894
|
|
|
|
key: train_fscore
|
|
value: [0.86056972 0.84984985 0.83207262 0.84627093 0.82352941 0.84355828
|
|
0.85112782 0.85022693 0.83483483 0.84511278]
|
|
|
|
mean value: 0.8437153169909086
|
|
|
|
key: test_precision
|
|
value: [0.75 0.79487179 0.86111111 0.73684211 0.83333333 0.79487179
|
|
0.81578947 0.825 0.7804878 0.84615385]
|
|
|
|
mean value: 0.8038461264167297
|
|
|
|
key: train_precision
|
|
value: [0.82234957 0.81321839 0.80174927 0.81764706 0.80851064 0.82089552
|
|
0.81556196 0.81924198 0.79885057 0.80979827]
|
|
|
|
mean value: 0.8127823239419141
|
|
|
|
key: test_recall
|
|
value: [0.77142857 0.88571429 0.88571429 0.77777778 0.83333333 0.86111111
|
|
0.88571429 0.94285714 0.91428571 0.94285714]
|
|
|
|
mean value: 0.8700793650793651
|
|
|
|
key: train_recall
|
|
value: [0.90251572 0.88993711 0.86477987 0.87697161 0.83911672 0.86750789
|
|
0.88993711 0.8836478 0.87421384 0.8836478 ]
|
|
|
|
mean value: 0.877227545979406
|
|
|
|
key: test_roc_auc
|
|
value: [0.76071429 0.83174603 0.8734127 0.74603175 0.83095238 0.81626984
|
|
0.84285714 0.87142857 0.82857143 0.88571429]
|
|
|
|
mean value: 0.8287698412698412
|
|
|
|
key: train_roc_auc
|
|
value: [0.85346606 0.84244489 0.82513442 0.84100153 0.82050176 0.83941432
|
|
0.84433962 0.84433962 0.82704403 0.83805031]
|
|
|
|
mean value: 0.8375736563299804
|
|
|
|
key: test_jcc
|
|
value: [0.61363636 0.72093023 0.775 0.60869565 0.71428571 0.70454545
|
|
0.73809524 0.78571429 0.72727273 0.80487805]
|
|
|
|
mean value: 0.7193053717062324
|
|
|
|
key: train_jcc
|
|
value: [0.75526316 0.73890339 0.71243523 0.73350923 0.7 0.72944297
|
|
0.7408377 0.73947368 0.71649485 0.73177083]
|
|
|
|
mean value: 0.7298131050201774
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03253651 0.03122544 0.02751136 0.02687168 0.03280592 0.02729893
|
|
0.03124261 0.02259803 0.02992439 0.03293633]
|
|
|
|
mean value: 0.02949512004852295
|
|
|
|
key: score_time
|
|
value: [0.01159143 0.01200294 0.01204681 0.01171541 0.01250625 0.0117116
|
|
0.01202464 0.01204658 0.01194882 0.01211691]
|
|
|
|
mean value: 0.011971139907836914
|
|
|
|
key: test_mcc
|
|
value: [0.57247871 0.9451949 0.86205133 0.85952381 0.85952381 0.78640246
|
|
0.94285714 0.76870611 0.5923057 0.88571429]
|
|
|
|
mean value: 0.8074758264216354
|
|
|
|
key: train_mcc
|
|
value: [0.76004007 0.93117386 0.93442263 0.95598214 0.94725427 0.80290703
|
|
0.95912424 0.87000349 0.84789086 0.9749199 ]
|
|
|
|
mean value: 0.8983718495679939
|
|
|
|
key: test_accuracy
|
|
value: [0.76056338 0.97183099 0.92957746 0.92957746 0.92957746 0.88732394
|
|
0.97142857 0.87142857 0.77142857 0.94285714]
|
|
|
|
mean value: 0.8965593561368209
|
|
|
|
key: train_accuracy
|
|
value: [0.86771654 0.96535433 0.96692913 0.97795276 0.97322835 0.89448819
|
|
0.97955975 0.93081761 0.91823899 0.98742138]
|
|
|
|
mean value: 0.9461707027187639
|
|
|
|
key: test_fscore
|
|
value: [0.69090909 0.97222222 0.92537313 0.92957746 0.92957746 0.87878788
|
|
0.97142857 0.88607595 0.80952381 0.94285714]
|
|
|
|
mean value: 0.8936332729001627
|
|
|
|
key: train_fscore
|
|
value: [0.84892086 0.96485623 0.9664 0.97805643 0.97372488 0.88347826
|
|
0.97952756 0.93529412 0.9244186 0.9875 ]
|
|
|
|
mean value: 0.9442176945976866
|
|
|
|
key: test_precision
|
|
value: [0.95 0.94594595 0.96875 0.94285714 0.94285714 0.96666667
|
|
0.97142857 0.79545455 0.69387755 0.94285714]
|
|
|
|
mean value: 0.9120694709087566
|
|
|
|
key: train_precision
|
|
value: [0.99159664 0.98051948 0.98371336 0.97196262 0.95454545 0.98449612
|
|
0.98107256 0.87845304 0.85945946 0.98136646]
|
|
|
|
mean value: 0.9567185182588565
|
|
|
|
key: test_recall
|
|
value: [0.54285714 1. 0.88571429 0.91666667 0.91666667 0.80555556
|
|
0.97142857 1. 0.97142857 0.94285714]
|
|
|
|
mean value: 0.8953174603174603
|
|
|
|
key: train_recall
|
|
value: [0.74213836 0.94968553 0.94968553 0.98422713 0.99369085 0.80126183
|
|
0.97798742 1. 1. 0.99371069]
|
|
|
|
mean value: 0.9392387357895363
|
|
|
|
key: test_roc_auc
|
|
value: [0.75753968 0.97222222 0.92896825 0.9297619 0.9297619 0.88849206
|
|
0.97142857 0.87142857 0.77142857 0.94285714]
|
|
|
|
mean value: 0.8963888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.86791461 0.96537904 0.96695633 0.97796262 0.97326052 0.89434161
|
|
0.97955975 0.93081761 0.91823899 0.98742138]
|
|
|
|
mean value: 0.9461852469099061
|
|
|
|
key: test_jcc
|
|
value: [0.52777778 0.94594595 0.86111111 0.86842105 0.86842105 0.78378378
|
|
0.94444444 0.79545455 0.68 0.89189189]
|
|
|
|
mean value: 0.8167251605672659
|
|
|
|
key: train_jcc
|
|
value: [0.7375 0.93209877 0.93498452 0.95705521 0.94879518 0.79127726
|
|
0.95987654 0.87845304 0.85945946 0.97530864]
|
|
|
|
mean value: 0.8974808622888412
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02204204 0.03669167 0.02813387 0.02228069 0.02436638 0.0228703
|
|
0.01936364 0.02347398 0.022367 0.02248216]
|
|
|
|
mean value: 0.024407172203063966
|
|
|
|
key: score_time
|
|
value: [0.01175761 0.0125258 0.01175833 0.01201391 0.01199889 0.0120368
|
|
0.01192093 0.01198363 0.01197934 0.0120008 ]
|
|
|
|
mean value: 0.011997604370117187
|
|
|
|
key: test_mcc
|
|
value: [0.3354102 0.84343471 0.91580648 0.88880092 0.91885703 0.66791601
|
|
0.84102145 0.97182532 0.860309 0.8871639 ]
|
|
|
|
mean value: 0.8130545019788025
|
|
|
|
key: train_mcc
|
|
value: [0.4537707 0.91827631 0.95279902 0.95917497 0.94646152 0.59603569
|
|
0.88969056 0.9528349 0.9659869 0.94968553]
|
|
|
|
mean value: 0.8584716114273491
|
|
|
|
key: test_accuracy
|
|
value: [0.6056338 0.91549296 0.95774648 0.94366197 0.95774648 0.81690141
|
|
0.91428571 0.98571429 0.92857143 0.94285714]
|
|
|
|
mean value: 0.896861167002012
|
|
|
|
key: train_accuracy
|
|
value: [0.67244094 0.95748031 0.97637795 0.97952756 0.97322835 0.76377953
|
|
0.9418239 0.97641509 0.9827044 0.97484277]
|
|
|
|
mean value: 0.9198620809191304
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.92105263 0.95652174 0.94285714 0.95652174 0.78688525
|
|
0.92105263 0.98591549 0.93150685 0.94444444]
|
|
|
|
mean value: 0.8680091250228139
|
|
|
|
key: train_fscore
|
|
value: [0.51627907 0.95927602 0.97630332 0.97965571 0.97322835 0.69135802
|
|
0.94502229 0.97637795 0.98299845 0.97484277]
|
|
|
|
mean value: 0.8975341951318628
|
|
|
|
key: test_precision
|
|
value: [1. 0.85365854 0.97058824 0.97058824 1. 0.96
|
|
0.85365854 0.97222222 0.89473684 0.91891892]
|
|
|
|
mean value: 0.9394371527005372
|
|
|
|
key: train_precision
|
|
value: [0.99107143 0.92173913 0.98095238 0.97204969 0.97169811 0.99408284
|
|
0.89577465 0.97791798 0.96656535 0.97484277]
|
|
|
|
mean value: 0.9646694328643369
|
|
|
|
key: test_recall
|
|
value: [0.2 1. 0.94285714 0.91666667 0.91666667 0.66666667
|
|
1. 1. 0.97142857 0.97142857]
|
|
|
|
mean value: 0.8585714285714285
|
|
|
|
key: train_recall
|
|
value: [0.3490566 1. 0.97169811 0.9873817 0.97476341 0.52996845
|
|
1. 0.97484277 1. 0.97484277]
|
|
|
|
mean value: 0.8762553816241097
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.91666667 0.95753968 0.94404762 0.95833333 0.81904762
|
|
0.91428571 0.98571429 0.92857143 0.94285714]
|
|
|
|
mean value: 0.8967063492063492
|
|
|
|
key: train_roc_auc
|
|
value: [0.67295101 0.95741325 0.97638533 0.97953991 0.97323076 0.7634119
|
|
0.9418239 0.97641509 0.9827044 0.97484277]
|
|
|
|
mean value: 0.919871833025812
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.85365854 0.91666667 0.89189189 0.91666667 0.64864865
|
|
0.85365854 0.97222222 0.87179487 0.89473684]
|
|
|
|
mean value: 0.8019944883166963
|
|
|
|
key: train_jcc
|
|
value: [0.34796238 0.92173913 0.9537037 0.9601227 0.94785276 0.52830189
|
|
0.89577465 0.95384615 0.96656535 0.95092025]
|
|
|
|
mean value: 0.8426788960175103
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22546458 0.19436455 0.1946454 0.19497442 0.19445992 0.19479394
|
|
0.19455838 0.19483304 0.19500494 0.19600224]
|
|
|
|
mean value: 0.19791014194488527
|
|
|
|
key: score_time
|
|
value: [0.01549411 0.01600933 0.01583385 0.01555824 0.01572871 0.01574898
|
|
0.01564622 0.01579309 0.01569223 0.01605964]
|
|
|
|
mean value: 0.015756440162658692
|
|
|
|
key: test_mcc
|
|
value: [0.97222222 1. 0.97222222 0.94511009 0.9451949 0.97220047
|
|
0.97182532 0.97182532 0.89155583 1. ]
|
|
|
|
mean value: 0.9642156357128903
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 1. 0.98591549 0.97183099 0.97183099 0.98591549
|
|
0.98571429 0.98571429 0.94285714 1. ]
|
|
|
|
mean value: 0.981569416498994
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98591549 1. 0.98591549 0.97297297 0.97142857 0.98630137
|
|
0.98591549 0.98591549 0.94594595 1. ]
|
|
|
|
mean value: 0.982031083204149
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97222222 1. 0.97222222 0.94736842 1. 0.97297297
|
|
0.97222222 0.97222222 0.8974359 1. ]
|
|
|
|
mean value: 0.9706666180350391
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.94444444 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9944444444444445
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98611111 1. 0.98611111 0.97142857 0.97222222 0.98571429
|
|
0.98571429 0.98571429 0.94285714 1. ]
|
|
|
|
mean value: 0.9815873015873016
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97222222 1. 0.97222222 0.94736842 0.94444444 0.97297297
|
|
0.97222222 0.97222222 0.8974359 1. ]
|
|
|
|
mean value: 0.9651110624794835
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07076693 0.08627796 0.09544015 0.065274 0.10156941 0.09778333
|
|
0.08509755 0.08592248 0.09170723 0.08407021]
|
|
|
|
mean value: 0.08639092445373535
|
|
|
|
key: score_time
|
|
value: [0.0237596 0.04130745 0.01855063 0.02521944 0.03415465 0.03322506
|
|
0.02572966 0.0419929 0.02918959 0.02997637]
|
|
|
|
mean value: 0.030310535430908205
|
|
|
|
key: test_mcc
|
|
value: [0.97222222 1. 0.94365079 0.97220047 1. 0.97220047
|
|
0.97182532 0.97182532 0.8660254 0.94440028]
|
|
|
|
mean value: 0.9614350263932501
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99685531 0.99372043 1. 0.99055612 0.99372055
|
|
1. 0.99686027 0.99371069 1. ]
|
|
|
|
mean value: 0.9965423374157795
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 1. 0.97183099 0.98591549 1. 0.98591549
|
|
0.98571429 0.98571429 0.92857143 0.97142857]
|
|
|
|
mean value: 0.9801006036217303
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.9984252 0.99685039 1. 0.99527559 0.99685039
|
|
1. 0.99842767 0.99685535 1. ]
|
|
|
|
mean value: 0.9982684593671074
|
|
|
|
key: test_fscore
|
|
value: [0.98591549 1. 0.97142857 0.98630137 1. 0.98630137
|
|
0.98591549 0.98591549 0.93333333 0.97222222]
|
|
|
|
mean value: 0.9807333345583393
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99843014 0.9968652 1. 0.99527559 0.99685535
|
|
1. 0.99843014 0.99685535 1. ]
|
|
|
|
mean value: 0.9982711768711404
|
|
|
|
key: test_precision
|
|
value: [0.97222222 1. 0.97142857 0.97297297 1. 0.97297297
|
|
0.97222222 0.97222222 0.875 0.94594595]
|
|
|
|
mean value: 0.965498712998713
|
|
|
|
key: train_precision
|
|
value: [1. 0.9968652 0.99375 1. 0.99371069 0.99373041
|
|
1. 0.9968652 0.99685535 1. ]
|
|
|
|
mean value: 0.9971776852782871
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97142857 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9971428571428571
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.99684543 1.
|
|
1. 1. 0.99685535 1. ]
|
|
|
|
mean value: 0.9993700771779458
|
|
|
|
key: test_roc_auc
|
|
value: [0.98611111 1. 0.9718254 0.98571429 1. 0.98571429
|
|
0.98571429 0.98571429 0.92857143 0.97142857]
|
|
|
|
mean value: 0.9800793650793651
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99842271 0.99684543 1. 0.99527806 0.99685535
|
|
1. 0.99842767 0.99685535 1. ]
|
|
|
|
mean value: 0.998268456242684
|
|
|
|
key: test_jcc
|
|
value: [0.97222222 1. 0.94444444 0.97297297 1. 0.97297297
|
|
0.97222222 0.97222222 0.875 0.94594595]
|
|
|
|
mean value: 0.9628003003003003
|
|
|
|
key: train_jcc
|
|
value: [1. 0.9968652 0.99375 1. 0.99059561 0.99373041
|
|
1. 0.9968652 0.99373041 1. ]
|
|
|
|
mean value: 0.9965536833855799
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.26558423 0.2791121 0.19435525 0.27373028 0.37693286 0.20460057
|
|
0.22483277 0.25122952 0.26332879 0.26462841]
|
|
|
|
mean value: 0.2598334789276123
|
|
|
|
key: score_time
|
|
value: [0.02960515 0.01895237 0.03322577 0.03162241 0.02962518 0.01701069
|
|
0.02908564 0.02873373 0.02856064 0.0286057 ]
|
|
|
|
mean value: 0.027502727508544923
|
|
|
|
key: test_mcc
|
|
value: [0.72937021 0.9451949 0.91587302 0.94511009 0.80543187 0.7488124
|
|
0.89155583 0.89155583 0.80829038 0.80295507]
|
|
|
|
mean value: 0.8484149579603325
|
|
|
|
key: train_mcc
|
|
value: [0.97165815 0.98429564 0.98425673 0.9842961 0.97795766 0.97177468
|
|
0.98749951 0.97799226 0.98744091 0.97822445]
|
|
|
|
mean value: 0.980539609426249
|
|
|
|
key: test_accuracy
|
|
value: [0.85915493 0.97183099 0.95774648 0.97183099 0.90140845 0.87323944
|
|
0.94285714 0.94285714 0.9 0.9 ]
|
|
|
|
mean value: 0.922092555331992
|
|
|
|
key: train_accuracy
|
|
value: [0.98582677 0.99212598 0.99212598 0.99212598 0.98897638 0.98582677
|
|
0.99371069 0.98899371 0.99371069 0.98899371]
|
|
|
|
mean value: 0.9902416679047195
|
|
|
|
key: test_fscore
|
|
value: [0.86842105 0.97222222 0.95774648 0.97297297 0.90666667 0.88
|
|
0.94594595 0.94594595 0.90666667 0.90410959]
|
|
|
|
mean value: 0.9260697540966335
|
|
|
|
key: train_fscore
|
|
value: [0.98587127 0.99217527 0.99215071 0.99215071 0.98897638 0.98591549
|
|
0.99375 0.98901099 0.99373041 0.98911353]
|
|
|
|
mean value: 0.990284475609541
|
|
|
|
key: test_precision
|
|
value: [0.80487805 0.94594595 0.94444444 0.94736842 0.87179487 0.84615385
|
|
0.8974359 0.8974359 0.85 0.86842105]
|
|
|
|
mean value: 0.8873878425675602
|
|
|
|
key: train_precision
|
|
value: [0.98432602 0.98753894 0.99059561 0.9875 0.98742138 0.97826087
|
|
0.98757764 0.98746082 0.990625 0.97846154]
|
|
|
|
mean value: 0.9859767817377142
|
|
|
|
key: test_recall
|
|
value: [0.94285714 1. 0.97142857 1. 0.94444444 0.91666667
|
|
1. 1. 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9689682539682539
|
|
|
|
key: train_recall
|
|
value: [0.98742138 0.99685535 0.99371069 0.99684543 0.99053628 0.99369085
|
|
1. 0.99056604 0.99685535 1. ]
|
|
|
|
mean value: 0.9946481360236494
|
|
|
|
key: test_roc_auc
|
|
value: [0.86031746 0.97222222 0.95793651 0.97142857 0.90079365 0.87261905
|
|
0.94285714 0.94285714 0.9 0.9 ]
|
|
|
|
mean value: 0.9221031746031746
|
|
|
|
key: train_roc_auc
|
|
value: [0.98582426 0.99211852 0.99212348 0.9921334 0.98897883 0.98583914
|
|
0.99371069 0.98899371 0.99371069 0.98899371]
|
|
|
|
mean value: 0.9902426442870464
|
|
|
|
key: test_jcc
|
|
value: [0.76744186 0.94594595 0.91891892 0.94736842 0.82926829 0.78571429
|
|
0.8974359 0.8974359 0.82926829 0.825 ]
|
|
|
|
mean value: 0.8643797812334547
|
|
|
|
key: train_jcc
|
|
value: [0.97213622 0.98447205 0.98442368 0.98442368 0.97819315 0.97222222
|
|
0.98757764 0.97826087 0.98753894 0.97846154]
|
|
|
|
mean value: 0.9807709981852525
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.80205464 0.78985548 0.78449249 0.78770638 0.78568339 0.78444052
|
|
0.79058051 0.78965425 0.78158712 0.78455234]
|
|
|
|
mean value: 0.7880607128143311
|
|
|
|
key: score_time
|
|
value: [0.00961947 0.00938654 0.00942087 0.00940108 0.0095551 0.00935578
|
|
0.00921917 0.0093565 0.00953579 0.00915623]
|
|
|
|
mean value: 0.009400653839111327
|
|
|
|
key: test_mcc
|
|
value: [0.97222222 1. 0.94365079 0.97220047 0.9451949 0.91587302
|
|
0.97182532 0.97182532 0.8660254 0.94440028]
|
|
|
|
mean value: 0.9503217711318614
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98591549 1. 0.97183099 0.98591549 0.97183099 0.95774648
|
|
0.98571429 0.98571429 0.92857143 0.97142857]
|
|
|
|
mean value: 0.974466800804829
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98591549 1. 0.97142857 0.98630137 0.97142857 0.95774648
|
|
0.98591549 0.98591549 0.93333333 0.97222222]
|
|
|
|
mean value: 0.9750207026022191
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97222222 1. 0.97142857 0.97297297 1. 0.97142857
|
|
0.97222222 0.97222222 0.875 0.94594595]
|
|
|
|
mean value: 0.9653442728442728
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97142857 1. 0.94444444 0.94444444
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.986031746031746
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98611111 1. 0.9718254 0.98571429 0.97222222 0.95793651
|
|
0.98571429 0.98571429 0.92857143 0.97142857]
|
|
|
|
mean value: 0.9745238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97222222 1. 0.94444444 0.97297297 0.94444444 0.91891892
|
|
0.97222222 0.97222222 0.875 0.94594595]
|
|
|
|
mean value: 0.9518393393393394
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0370574 0.0337615 0.03360891 0.03357196 0.0335803 0.032866
|
|
0.0333879 0.03294683 0.03329921 0.03359008]
|
|
|
|
mean value: 0.03376700878143311
|
|
|
|
key: score_time
|
|
value: [0.01236892 0.01744151 0.01777196 0.01530886 0.01502085 0.01501894
|
|
0.01506972 0.01521397 0.01261902 0.0150373 ]
|
|
|
|
mean value: 0.015087103843688965
|
|
|
|
key: test_mcc
|
|
value: [0.9451949 0.91580648 0.94365079 1. 0.9451949 0.9451949
|
|
1. 0.94440028 0.91465912 0.94440028]
|
|
|
|
mean value: 0.9498501653017968
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97183099 0.95774648 0.97183099 1. 0.97183099 0.97183099
|
|
1. 0.97142857 0.95714286 0.97142857]
|
|
|
|
mean value: 0.9745070422535211
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97222222 0.95652174 0.97142857 1. 0.97142857 0.97142857
|
|
1. 0.97058824 0.95774648 0.97058824]
|
|
|
|
mean value: 0.9741952625099846
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94594595 0.97058824 0.97142857 1. 1. 1.
|
|
1. 1. 0.94444444 1. ]
|
|
|
|
mean value: 0.983240719711308
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.94285714 0.97142857 1. 0.94444444 0.94444444
|
|
1. 0.94285714 0.97142857 0.94285714]
|
|
|
|
mean value: 0.966031746031746
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97222222 0.95753968 0.9718254 1. 0.97222222 0.97222222
|
|
1. 0.97142857 0.95714286 0.97142857]
|
|
|
|
mean value: 0.9746031746031746
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94594595 0.91666667 0.94444444 1. 0.94444444 0.94444444
|
|
1. 0.94285714 0.91891892 0.94285714]
|
|
|
|
mean value: 0.950057915057915
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02943301 0.02385998 0.05443525 0.05325747 0.03985357 0.0398984
|
|
0.04389644 0.0401125 0.04032493 0.03677821]
|
|
|
|
mean value: 0.040184974670410156
|
|
|
|
key: score_time
|
|
value: [0.02438474 0.0154922 0.01888061 0.02110052 0.01884937 0.01861572
|
|
0.0188458 0.01861072 0.01863813 0.01865315]
|
|
|
|
mean value: 0.019207096099853514
|
|
|
|
key: test_mcc
|
|
value: [0.88862624 0.91587302 0.94365079 0.88880092 0.88880092 0.97220047
|
|
0.94285714 0.97182532 0.8340361 0.91465912]
|
|
|
|
mean value: 0.9161330021505507
|
|
|
|
key: train_mcc
|
|
value: [0.93708333 0.92767571 0.94020019 0.9401617 0.94330695 0.92457213
|
|
0.94029342 0.92789779 0.94347087 0.93089126]
|
|
|
|
mean value: 0.9355553339350321
|
|
|
|
key: test_accuracy
|
|
value: [0.94366197 0.95774648 0.97183099 0.94366197 0.94366197 0.98591549
|
|
0.97142857 0.98571429 0.91428571 0.95714286]
|
|
|
|
mean value: 0.9575050301810866
|
|
|
|
key: train_accuracy
|
|
value: [0.96850394 0.96377953 0.97007874 0.97007874 0.97165354 0.96220472
|
|
0.97012579 0.96383648 0.97169811 0.96540881]
|
|
|
|
mean value: 0.9677368394988363
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.95774648 0.97142857 0.94285714 0.94285714 0.98630137
|
|
0.97142857 0.98591549 0.91891892 0.95652174]
|
|
|
|
mean value: 0.9575151898903017
|
|
|
|
key: train_fscore
|
|
value: [0.96835443 0.96354992 0.9699842 0.9699842 0.97160883 0.96178344
|
|
0.9699842 0.96343402 0.97151899 0.96518987]
|
|
|
|
mean value: 0.9675392113090577
|
|
|
|
key: test_precision
|
|
value: [0.96969697 0.94444444 0.97142857 0.97058824 0.97058824 0.97297297
|
|
0.97142857 0.97222222 0.87179487 0.97058824]
|
|
|
|
mean value: 0.9585753329870977
|
|
|
|
key: train_precision
|
|
value: [0.97452229 0.97124601 0.97460317 0.97151899 0.97160883 0.97106109
|
|
0.97460317 0.97427653 0.97770701 0.97133758]
|
|
|
|
mean value: 0.9732484675305139
|
|
|
|
key: test_recall
|
|
value: [0.91428571 0.97142857 0.97142857 0.91666667 0.91666667 1.
|
|
0.97142857 1. 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9576190476190476
|
|
|
|
key: train_recall
|
|
value: [0.96226415 0.95597484 0.96540881 0.96845426 0.97160883 0.95268139
|
|
0.96540881 0.95283019 0.96540881 0.9591195 ]
|
|
|
|
mean value: 0.9619159573834891
|
|
|
|
key: test_roc_auc
|
|
value: [0.94325397 0.95793651 0.9718254 0.94404762 0.94404762 0.98571429
|
|
0.97142857 0.98571429 0.91428571 0.95714286]
|
|
|
|
mean value: 0.9575396825396825
|
|
|
|
key: train_roc_auc
|
|
value: [0.96851378 0.96379184 0.97008611 0.97007619 0.97165347 0.96218975
|
|
0.97012579 0.96383648 0.97169811 0.96540881]
|
|
|
|
mean value: 0.9677380314663809
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.91891892 0.94444444 0.89189189 0.89189189 0.97297297
|
|
0.94444444 0.97222222 0.85 0.91666667]
|
|
|
|
mean value: 0.9192342342342342
|
|
|
|
key: train_jcc
|
|
value: [0.93865031 0.92966361 0.94171779 0.94171779 0.94478528 0.92638037
|
|
0.94171779 0.92944785 0.94461538 0.93272171]
|
|
|
|
mean value: 0.9371417883630413
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.38817883 0.31455183 0.31654954 0.37353539 0.45175648 0.19992256
|
|
0.2912755 0.32329702 0.31166291 0.31874299]
|
|
|
|
mean value: 0.3289473056793213
|
|
|
|
key: score_time
|
|
value: [0.01890993 0.01887488 0.02181625 0.01895404 0.01883698 0.01887655
|
|
0.02320552 0.01887083 0.01891518 0.01881385]
|
|
|
|
mean value: 0.019607400894165038
|
|
|
|
key: test_mcc
|
|
value: [0.88862624 0.91587302 0.94365079 0.88880092 0.88880092 0.97220047
|
|
0.94285714 0.97182532 0.8340361 0.91465912]
|
|
|
|
mean value: 0.9161330021505507
|
|
|
|
key: train_mcc
|
|
value: [0.93708333 0.92767571 0.94020019 0.9401617 0.94330695 0.92457213
|
|
0.94029342 0.92789779 0.94347087 0.93089126]
|
|
|
|
mean value: 0.9355553339350321
|
|
|
|
key: test_accuracy
|
|
value: [0.94366197 0.95774648 0.97183099 0.94366197 0.94366197 0.98591549
|
|
0.97142857 0.98571429 0.91428571 0.95714286]
|
|
|
|
mean value: 0.9575050301810866
|
|
|
|
key: train_accuracy
|
|
value: [0.96850394 0.96377953 0.97007874 0.97007874 0.97165354 0.96220472
|
|
0.97012579 0.96383648 0.97169811 0.96540881]
|
|
|
|
mean value: 0.9677368394988363
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.95774648 0.97142857 0.94285714 0.94285714 0.98630137
|
|
0.97142857 0.98591549 0.91891892 0.95652174]
|
|
|
|
mean value: 0.9575151898903017
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:195: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_orig.py:198: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.96835443 0.96354992 0.9699842 0.9699842 0.97160883 0.96178344
|
|
0.9699842 0.96343402 0.97151899 0.96518987]
|
|
|
|
mean value: 0.9675392113090577
|
|
|
|
key: test_precision
|
|
value: [0.96969697 0.94444444 0.97142857 0.97058824 0.97058824 0.97297297
|
|
0.97142857 0.97222222 0.87179487 0.97058824]
|
|
|
|
mean value: 0.9585753329870977
|
|
|
|
key: train_precision
|
|
value: [0.97452229 0.97124601 0.97460317 0.97151899 0.97160883 0.97106109
|
|
0.97460317 0.97427653 0.97770701 0.97133758]
|
|
|
|
mean value: 0.9732484675305139
|
|
|
|
key: test_recall
|
|
value: [0.91428571 0.97142857 0.97142857 0.91666667 0.91666667 1.
|
|
0.97142857 1. 0.97142857 0.94285714]
|
|
|
|
mean value: 0.9576190476190476
|
|
|
|
key: train_recall
|
|
value: [0.96226415 0.95597484 0.96540881 0.96845426 0.97160883 0.95268139
|
|
0.96540881 0.95283019 0.96540881 0.9591195 ]
|
|
|
|
mean value: 0.9619159573834891
|
|
|
|
key: test_roc_auc
|
|
value: [0.94325397 0.95793651 0.9718254 0.94404762 0.94404762 0.98571429
|
|
0.97142857 0.98571429 0.91428571 0.95714286]
|
|
|
|
mean value: 0.9575396825396825
|
|
|
|
key: train_roc_auc
|
|
value: [0.96851378 0.96379184 0.97008611 0.97007619 0.97165347 0.96218975
|
|
0.97012579 0.96383648 0.97169811 0.96540881]
|
|
|
|
mean value: 0.9677380314663809
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.91891892 0.94444444 0.89189189 0.89189189 0.97297297
|
|
0.94444444 0.97222222 0.85 0.91666667]
|
|
|
|
mean value: 0.9192342342342342
|
|
|
|
key: train_jcc
|
|
value: [0.93865031 0.92966361 0.94171779 0.94171779 0.94478528 0.92638037
|
|
0.94171779 0.92944785 0.94461538 0.93272171]
|
|
|
|
mean value: 0.9371417883630413
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.81
|