19550 lines
958 KiB
Text
19550 lines
958 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_sl.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 424
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 424
|
|
ncols: 265
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 102
|
|
log10_or_mychisq 102
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 166
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 173
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification according to scaling law [COMPLETE data]: 1/sqrt(x_ncols)
|
|
Input features data size: (424, 173)
|
|
Train data size: (391, 173)
|
|
Test data size: (33, 173)
|
|
y_train numbers: Counter({1: 215, 0: 176})
|
|
y_train ratio: 0.8186046511627907
|
|
|
|
y_test_numbers: Counter({1: 18, 0: 15})
|
|
y_test ratio: 0.8333333333333334
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({1: 215, 0: 176}) Data dim: (391, 173)
|
|
|
|
Simple Random OverSampling
|
|
Counter({1: 215, 0: 215})
|
|
(430, 173)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 176, 1: 176})
|
|
(352, 173)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 215, 1: 215})
|
|
(430, 173)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 215, 0: 215})
|
|
(430, 173)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis [COMPLETE DATA]: 70/30 split
|
|
Gene name: pncA
|
|
Drug name: pyrazinamide
|
|
|
|
Output directory: /home/tanu/git/Data/pyrazinamide/output/ml/tts_cd_sl/
|
|
|
|
Sanity checks:
|
|
Total input features: 173
|
|
|
|
Training data size: (391, 173)
|
|
Test data size: (33, 173)
|
|
|
|
Target feature numbers (training data): Counter({1: 215, 0: 176})
|
|
Target features ratio (training data: 0.8186046511627907
|
|
|
|
Target feature numbers (test data): Counter({1: 18, 0: 15})
|
|
Target features ratio (test data): 0.8333333333333334
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 34
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06880522 0.06128597 0.03583455 0.03662205 0.03685975 0.03469419
|
|
0.07428098 0.07097745 0.07788467 0.0719893 ]
|
|
|
|
mean value: 0.056923413276672365
|
|
|
|
key: score_time
|
|
value: [0.02212453 0.0147028 0.01471114 0.01479864 0.01835752 0.01722169
|
|
0.01488853 0.02007484 0.01981497 0.01230764]
|
|
|
|
mean value: 0.016900229454040527
|
|
|
|
key: test_mcc
|
|
value: [0.38978972 0.31232635 0.37433155 0.74203177 0.63570849 0.28496141
|
|
0.59384599 0.54870326 0.58730159 0.59366961]
|
|
|
|
mean value: 0.5062669750285254
|
|
|
|
key: train_mcc
|
|
value: [0.72322874 0.67828767 0.71860784 0.68388464 0.72998334 0.71807179
|
|
0.70142242 0.72384814 0.72961234 0.68920955]
|
|
|
|
mean value: 0.7096156476090553
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.66666667 0.69230769 0.87179487 0.82051282 0.64102564
|
|
0.79487179 0.74358974 0.79487179 0.79487179]
|
|
|
|
mean value: 0.752051282051282
|
|
|
|
key: train_accuracy
|
|
value: [0.86324786 0.84090909 0.86079545 0.84375 0.86647727 0.86079545
|
|
0.85227273 0.86363636 0.86647727 0.84659091]
|
|
|
|
mean value: 0.8564952408702409
|
|
|
|
key: test_fscore
|
|
value: [0.73913043 0.72340426 0.72727273 0.88372093 0.85106383 0.65
|
|
0.82608696 0.80769231 0.80952381 0.8 ]
|
|
|
|
mean value: 0.7817895251132134
|
|
|
|
key: train_fscore
|
|
value: [0.87755102 0.85641026 0.87403599 0.86005089 0.88040712 0.87594937
|
|
0.86597938 0.87817259 0.88101266 0.86363636]
|
|
|
|
mean value: 0.8713205641031424
|
|
|
|
key: test_precision
|
|
value: [0.70833333 0.68 0.72727273 0.9047619 0.8 0.68421053
|
|
0.76 0.67741935 0.80952381 0.84210526]
|
|
|
|
mean value: 0.7593626919204168
|
|
|
|
key: train_precision
|
|
value: [0.86432161 0.84771574 0.86734694 0.845 0.865 0.86069652
|
|
0.86597938 0.865 0.86567164 0.84653465]
|
|
|
|
mean value: 0.8593266476968946
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.77272727 0.72727273 0.86363636 0.90909091 0.61904762
|
|
0.9047619 1. 0.80952381 0.76190476]
|
|
|
|
mean value: 0.814069264069264
|
|
|
|
key: train_recall
|
|
value: [0.89119171 0.86528497 0.88082902 0.87564767 0.89637306 0.89175258
|
|
0.86597938 0.89175258 0.89690722 0.8814433 ]
|
|
|
|
mean value: 0.8837161476416858
|
|
|
|
key: test_roc_auc
|
|
value: [0.69191919 0.65106952 0.68716578 0.87299465 0.80748663 0.64285714
|
|
0.78571429 0.72222222 0.79365079 0.79761905]
|
|
|
|
mean value: 0.7452699261522792
|
|
|
|
key: train_roc_auc
|
|
value: [0.86015282 0.83830286 0.8586535 0.84033956 0.86328087 0.85726869
|
|
0.85071121 0.86043325 0.86301057 0.84262038]
|
|
|
|
mean value: 0.8534773716474491
|
|
|
|
key: test_jcc
|
|
value: [0.5862069 0.56666667 0.57142857 0.79166667 0.74074074 0.48148148
|
|
0.7037037 0.67741935 0.68 0.66666667]
|
|
|
|
mean value: 0.6465980748744931
|
|
|
|
key: train_jcc
|
|
value: [0.78181818 0.74887892 0.77625571 0.75446429 0.78636364 0.77927928
|
|
0.76363636 0.78280543 0.78733032 0.76 ]
|
|
|
|
mean value: 0.7720832124947455
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.88919163 1.10667205 1.70803356 1.91856122 1.52863455 1.64006972
|
|
1.92494702 1.51954269 1.73066807 1.40030789]
|
|
|
|
mean value: 1.5366628408432006
|
|
|
|
key: score_time
|
|
value: [0.01226068 0.02890587 0.02300549 0.02526402 0.02500796 0.01665282
|
|
0.02039099 0.01842761 0.02366877 0.01797628]
|
|
|
|
mean value: 0.021156048774719237
|
|
|
|
key: test_mcc
|
|
value: [0.38978972 0.42228828 0.42319443 0.5828877 0.63344389 0.28496141
|
|
0.64246755 0.51647727 0.53458203 0.64116318]
|
|
|
|
mean value: 0.5071255464360515
|
|
|
|
key: train_mcc
|
|
value: [0.67698193 0.66080987 0.6493094 0.71845866 0.77038423 0.7584744
|
|
0.75860776 0.78182499 0.66611594 0.72386034]
|
|
|
|
mean value: 0.7164827518671235
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.71794872 0.71794872 0.79487179 0.82051282 0.64102564
|
|
0.82051282 0.74358974 0.76923077 0.82051282]
|
|
|
|
mean value: 0.7546153846153846
|
|
|
|
key: train_accuracy
|
|
value: [0.84045584 0.83238636 0.82670455 0.86079545 0.88636364 0.88068182
|
|
0.88068182 0.89204545 0.83522727 0.86363636]
|
|
|
|
mean value: 0.8598978567728568
|
|
|
|
key: test_fscore
|
|
value: [0.73913043 0.7755102 0.75555556 0.81818182 0.84444444 0.65
|
|
0.84444444 0.8 0.79069767 0.82926829]
|
|
|
|
mean value: 0.7847232868592036
|
|
|
|
key: train_fscore
|
|
value: [0.85858586 0.85063291 0.84634761 0.87531807 0.89847716 0.89285714
|
|
0.89230769 0.90452261 0.85427136 0.87878788]
|
|
|
|
mean value: 0.8752108284351288
|
|
|
|
key: test_precision
|
|
value: [0.70833333 0.7037037 0.73913043 0.81818182 0.82608696 0.68421053
|
|
0.79166667 0.68965517 0.77272727 0.85 ]
|
|
|
|
mean value: 0.7583695884646725
|
|
|
|
key: train_precision
|
|
value: [0.83743842 0.83168317 0.82352941 0.86 0.88059701 0.88383838
|
|
0.8877551 0.88235294 0.83333333 0.86138614]
|
|
|
|
mean value: 0.8581913917655096
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.86363636 0.77272727 0.81818182 0.86363636 0.61904762
|
|
0.9047619 0.95238095 0.80952381 0.80952381]
|
|
|
|
mean value: 0.8186147186147186
|
|
|
|
key: train_recall
|
|
value: [0.88082902 0.87046632 0.87046632 0.89119171 0.91709845 0.90206186
|
|
0.89690722 0.92783505 0.87628866 0.89690722]
|
|
|
|
mean value: 0.8930051813471502
|
|
|
|
key: test_roc_auc
|
|
value: [0.69191919 0.69652406 0.70989305 0.79144385 0.81417112 0.64285714
|
|
0.81349206 0.72619048 0.76587302 0.82142857]
|
|
|
|
mean value: 0.7473792547321959
|
|
|
|
key: train_roc_auc
|
|
value: [0.83598413 0.82831492 0.82202561 0.85754554 0.88307752 0.87824612
|
|
0.87883336 0.88796816 0.83054939 0.85984601]
|
|
|
|
mean value: 0.856239076622146
|
|
|
|
key: test_jcc
|
|
value: [0.5862069 0.63333333 0.60714286 0.69230769 0.73076923 0.48148148
|
|
0.73076923 0.66666667 0.65384615 0.70833333]
|
|
|
|
mean value: 0.6490856876201704
|
|
|
|
key: train_jcc
|
|
value: [0.75221239 0.74008811 0.73362445 0.77828054 0.8156682 0.80645161
|
|
0.80555556 0.82568807 0.74561404 0.78378378]
|
|
|
|
mean value: 0.7786966755732057
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02437758 0.01368475 0.01371503 0.01373553 0.01398277 0.01400352
|
|
0.01363087 0.01404715 0.01381183 0.01371145]
|
|
|
|
mean value: 0.014870047569274902
|
|
|
|
key: score_time
|
|
value: [0.01235843 0.01252961 0.0123415 0.01239944 0.0125196 0.01226878
|
|
0.01242042 0.01238608 0.01226616 0.01235604]
|
|
|
|
mean value: 0.012384605407714844
|
|
|
|
key: test_mcc
|
|
value: [0.23466316 0.03656362 0.42012039 0.26162798 0.58501794 0.43085716
|
|
0.54870326 0.3474523 0.38575837 0.27777778]
|
|
|
|
mean value: 0.35285419652273936
|
|
|
|
key: train_mcc
|
|
value: [0.39819172 0.40075709 0.37914257 0.39405557 0.37165 0.41714552
|
|
0.35722079 0.40713425 0.38759372 0.38851052]
|
|
|
|
mean value: 0.39014017351285274
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.53846154 0.71794872 0.64102564 0.79487179 0.71794872
|
|
0.74358974 0.66666667 0.69230769 0.64102564]
|
|
|
|
mean value: 0.6778846153846154
|
|
|
|
key: train_accuracy
|
|
value: [0.7037037 0.70454545 0.69318182 0.69602273 0.69034091 0.71306818
|
|
0.68465909 0.70738636 0.69886364 0.69886364]
|
|
|
|
mean value: 0.6990635521885522
|
|
|
|
key: test_fscore
|
|
value: [0.71698113 0.625 0.76595745 0.69565217 0.83333333 0.74418605
|
|
0.80769231 0.74509804 0.75 0.66666667]
|
|
|
|
mean value: 0.7350567146216648
|
|
|
|
key: train_fscore
|
|
value: [0.75471698 0.75471698 0.75115207 0.76274945 0.74592075 0.76235294
|
|
0.74004684 0.76212471 0.75233645 0.75462963]
|
|
|
|
mean value: 0.7540746796722013
|
|
|
|
key: test_precision
|
|
value: [0.61290323 0.57692308 0.72 0.66666667 0.76923077 0.72727273
|
|
0.67741935 0.63333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6717082487405068
|
|
|
|
key: train_precision
|
|
value: [0.69264069 0.69264069 0.67634855 0.66666667 0.6779661 0.7012987
|
|
0.67811159 0.69037657 0.68803419 0.68487395]
|
|
|
|
mean value: 0.684895769729402
|
|
|
|
key: test_recall
|
|
value: [0.86363636 0.68181818 0.81818182 0.72727273 0.90909091 0.76190476
|
|
1. 0.9047619 0.85714286 0.66666667]
|
|
|
|
mean value: 0.819047619047619
|
|
|
|
key: train_recall
|
|
value: [0.82901554 0.82901554 0.84455959 0.89119171 0.82901554 0.83505155
|
|
0.81443299 0.85051546 0.82989691 0.84020619]
|
|
|
|
mean value: 0.8392901020244645
|
|
|
|
key: test_roc_auc
|
|
value: [0.59848485 0.51737968 0.70320856 0.62834225 0.77807487 0.71428571
|
|
0.72222222 0.6468254 0.67857143 0.63888889]
|
|
|
|
mean value: 0.6626283846872082
|
|
|
|
key: train_roc_auc
|
|
value: [0.68982423 0.69123733 0.67699677 0.6751556 0.67551406 0.69917134
|
|
0.66987472 0.69108052 0.6839358 0.68276132]
|
|
|
|
mean value: 0.6835551696333612
|
|
|
|
key: test_jcc
|
|
value: [0.55882353 0.45454545 0.62068966 0.53333333 0.71428571 0.59259259
|
|
0.67741935 0.59375 0.6 0.5 ]
|
|
|
|
mean value: 0.5845439634179983
|
|
|
|
key: train_jcc
|
|
value: [0.60606061 0.60606061 0.60147601 0.61648746 0.59479554 0.61596958
|
|
0.58736059 0.61567164 0.60299625 0.60594796]
|
|
|
|
mean value: 0.6052826249519565
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01415777 0.01403832 0.01400423 0.01419544 0.0141027 0.01410246
|
|
0.01409817 0.01396275 0.01395226 0.01405287]
|
|
|
|
mean value: 0.014066696166992188
|
|
|
|
key: score_time
|
|
value: [0.01240993 0.01239276 0.01233053 0.01219869 0.01227164 0.01228476
|
|
0.01232052 0.01230741 0.01235628 0.01227283]
|
|
|
|
mean value: 0.012314534187316895
|
|
|
|
key: test_mcc
|
|
value: [0.12974982 0.06149733 0.28117601 0.44298485 0.5828877 0.43535772
|
|
0.64246755 0.37940161 0.53826045 0.27348302]
|
|
|
|
mean value: 0.37672660455215623
|
|
|
|
key: train_mcc
|
|
value: [0.49959039 0.53088207 0.49047512 0.48722902 0.49476397 0.47780376
|
|
0.46632656 0.47174736 0.44408311 0.50475435]
|
|
|
|
mean value: 0.4867655710273362
|
|
|
|
key: test_accuracy
|
|
value: [0.575 0.53846154 0.64102564 0.71794872 0.79487179 0.71794872
|
|
0.82051282 0.69230769 0.76923077 0.64102564]
|
|
|
|
mean value: 0.6908333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.75213675 0.76704545 0.74715909 0.74715909 0.75 0.74147727
|
|
0.73579545 0.73863636 0.72443182 0.75568182]
|
|
|
|
mean value: 0.7459523115773116
|
|
|
|
key: test_fscore
|
|
value: [0.63829787 0.59090909 0.66666667 0.73170732 0.81818182 0.73170732
|
|
0.84444444 0.73913043 0.7804878 0.68181818]
|
|
|
|
mean value: 0.7223350948167626
|
|
|
|
key: train_fscore
|
|
value: [0.77402597 0.78534031 0.76762402 0.77694236 0.77319588 0.76485788
|
|
0.75968992 0.7628866 0.74805195 0.78172589]
|
|
|
|
mean value: 0.7694340779160749
|
|
|
|
key: test_precision
|
|
value: [0.6 0.59090909 0.7 0.78947368 0.81818182 0.75
|
|
0.79166667 0.68 0.8 0.65217391]
|
|
|
|
mean value: 0.717240517301158
|
|
|
|
key: train_precision
|
|
value: [0.77604167 0.79365079 0.77368421 0.75242718 0.76923077 0.76683938
|
|
0.76165803 0.7628866 0.7539267 0.77 ]
|
|
|
|
mean value: 0.7680345333375814
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.59090909 0.63636364 0.68181818 0.81818182 0.71428571
|
|
0.9047619 0.80952381 0.76190476 0.71428571]
|
|
|
|
mean value: 0.7313852813852814
|
|
|
|
key: train_recall
|
|
value: [0.77202073 0.77720207 0.76165803 0.80310881 0.77720207 0.7628866
|
|
0.75773196 0.7628866 0.74226804 0.79381443]
|
|
|
|
mean value: 0.7710779338710538
|
|
|
|
key: test_roc_auc
|
|
value: [0.56313131 0.53074866 0.64171123 0.72326203 0.79144385 0.71825397
|
|
0.81349206 0.68253968 0.76984127 0.63492063]
|
|
|
|
mean value: 0.6869344707580002
|
|
|
|
key: train_roc_auc
|
|
value: [0.74993441 0.76595953 0.74560889 0.74117705 0.7470916 0.73903824
|
|
0.73329636 0.73587368 0.72239984 0.7513376 ]
|
|
|
|
mean value: 0.7431717191049403
|
|
|
|
key: test_jcc
|
|
value: [0.46875 0.41935484 0.5 0.57692308 0.69230769 0.57692308
|
|
0.73076923 0.5862069 0.64 0.51724138]
|
|
|
|
mean value: 0.5708476191494823
|
|
|
|
key: train_jcc
|
|
value: [0.63135593 0.64655172 0.62288136 0.6352459 0.6302521 0.61924686
|
|
0.6125 0.61666667 0.59751037 0.64166667]
|
|
|
|
mean value: 0.6253877583455207
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01332045 0.01391959 0.01815677 0.01369214 0.024189 0.02506042
|
|
0.03361917 0.0132997 0.01338696 0.013623 ]
|
|
|
|
mean value: 0.01822671890258789
|
|
|
|
key: score_time
|
|
value: [0.12917995 0.067523 0.04327273 0.0376215 0.04688621 0.05427647
|
|
0.0461905 0.04015064 0.05202198 0.05504966]
|
|
|
|
mean value: 0.05721726417541504
|
|
|
|
key: test_mcc
|
|
value: [ 0.12338338 -0.13789005 0.11968254 0.26162798 0.42319443 0.38095238
|
|
0.27217941 0.43102253 0.32530002 0.11968254]
|
|
|
|
mean value: 0.23191351778019928
|
|
|
|
key: train_mcc
|
|
value: [0.50865848 0.56255301 0.48765473 0.51619414 0.48124505 0.46286069
|
|
0.48106234 0.53884115 0.48041774 0.49810084]
|
|
|
|
mean value: 0.5017588175290625
|
|
|
|
key: test_accuracy
|
|
value: [0.575 0.46153846 0.56410256 0.64102564 0.71794872 0.69230769
|
|
0.64102564 0.71794872 0.66666667 0.56410256]
|
|
|
|
mean value: 0.6241666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.75783476 0.78409091 0.74715909 0.76136364 0.74431818 0.73579545
|
|
0.74431818 0.77272727 0.74431818 0.75284091]
|
|
|
|
mean value: 0.7544766576016576
|
|
|
|
key: test_fscore
|
|
value: [0.65306122 0.57142857 0.60465116 0.69565217 0.75555556 0.71428571
|
|
0.69565217 0.75555556 0.71111111 0.60465116]
|
|
|
|
mean value: 0.6761604405833787
|
|
|
|
key: train_fscore
|
|
value: [0.79115479 0.81 0.77468354 0.79207921 0.77722772 0.77372263
|
|
0.78365385 0.80392157 0.7804878 0.78832117]
|
|
|
|
mean value: 0.7875252281431442
|
|
|
|
key: test_precision
|
|
value: [0.59259259 0.51851852 0.61904762 0.66666667 0.73913043 0.71428571
|
|
0.64 0.70833333 0.66666667 0.59090909]
|
|
|
|
mean value: 0.6456150636802811
|
|
|
|
key: train_precision
|
|
value: [0.75233645 0.7826087 0.75742574 0.75829384 0.74407583 0.73271889
|
|
0.73423423 0.76635514 0.74074074 0.74654378]
|
|
|
|
mean value: 0.7515333343043958
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.63636364 0.59090909 0.72727273 0.77272727 0.71428571
|
|
0.76190476 0.80952381 0.76190476 0.61904762]
|
|
|
|
mean value: 0.7121212121212122
|
|
|
|
key: train_recall
|
|
value: [0.83419689 0.83937824 0.79274611 0.82901554 0.8134715 0.81958763
|
|
0.84020619 0.84536082 0.82474227 0.83505155]
|
|
|
|
mean value: 0.8273756743763687
|
|
|
|
key: test_roc_auc
|
|
value: [0.55808081 0.43582888 0.56016043 0.62834225 0.70989305 0.69047619
|
|
0.63095238 0.71031746 0.65873016 0.55952381]
|
|
|
|
mean value: 0.6142305407011289
|
|
|
|
key: train_roc_auc
|
|
value: [0.74937693 0.77817969 0.74228501 0.75413041 0.73692443 0.72624951
|
|
0.73339423 0.76445256 0.73515594 0.74347514]
|
|
|
|
mean value: 0.7463623853929452
|
|
|
|
key: test_jcc
|
|
value: [0.48484848 0.4 0.43333333 0.53333333 0.60714286 0.55555556
|
|
0.53333333 0.60714286 0.55172414 0.43333333]
|
|
|
|
mean value: 0.5139747225954122
|
|
|
|
key: train_jcc
|
|
value: [0.65447154 0.68067227 0.6322314 0.6557377 0.63562753 0.63095238
|
|
0.64426877 0.67213115 0.64 0.65060241]
|
|
|
|
mean value: 0.6496695166699569
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02586198 0.02531052 0.04850888 0.04493904 0.02481985 0.05120564
|
|
0.0254631 0.02536511 0.0249207 0.02555561]
|
|
|
|
mean value: 0.03219504356384277
|
|
|
|
key: score_time
|
|
value: [0.01615477 0.01599693 0.03579712 0.01573038 0.01562381 0.02733827
|
|
0.01584196 0.01563573 0.01561165 0.01565671]
|
|
|
|
mean value: 0.018938732147216798
|
|
|
|
key: test_mcc
|
|
value: [0.23071239 0.25338873 0.3180697 0.6383069 0.58501794 0.43535772
|
|
0.60864099 0.46953014 0.64246755 0.43102253]
|
|
|
|
mean value: 0.4612514587273892
|
|
|
|
key: train_mcc
|
|
value: [0.69587587 0.71302795 0.68537043 0.6397475 0.70815606 0.66173069
|
|
0.65952731 0.6904032 0.66753551 0.65311656]
|
|
|
|
mean value: 0.6774491102165359
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.64102564 0.66666667 0.82051282 0.79487179 0.71794872
|
|
0.79487179 0.71794872 0.82051282 0.71794872]
|
|
|
|
mean value: 0.7317307692307692
|
|
|
|
key: train_accuracy
|
|
value: [0.84900285 0.85795455 0.84375 0.82102273 0.85511364 0.82954545
|
|
0.82954545 0.84659091 0.83522727 0.82670455]
|
|
|
|
mean value: 0.8394457394457394
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.72 0.71111111 0.8372093 0.83333333 0.73170732
|
|
0.83333333 0.78431373 0.84444444 0.75555556]
|
|
|
|
mean value: 0.7756890475607903
|
|
|
|
key: train_fscore
|
|
value: [0.8691358 0.87437186 0.86419753 0.84596577 0.87344913 0.85781991
|
|
0.85645933 0.86699507 0.85784314 0.85371703]
|
|
|
|
mean value: 0.8619954567196848
|
|
|
|
key: test_precision
|
|
value: [0.62068966 0.64285714 0.69565217 0.85714286 0.76923077 0.75
|
|
0.74074074 0.66666667 0.79166667 0.70833333]
|
|
|
|
mean value: 0.7242980005723634
|
|
|
|
key: train_precision
|
|
value: [0.83018868 0.84878049 0.8254717 0.80092593 0.83809524 0.79385965
|
|
0.79910714 0.83018868 0.81775701 0.79820628]
|
|
|
|
mean value: 0.8182580787782466
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.81818182 0.72727273 0.81818182 0.90909091 0.71428571
|
|
0.95238095 0.95238095 0.9047619 0.80952381]
|
|
|
|
mean value: 0.8424242424242424
|
|
|
|
key: train_recall
|
|
value: [0.9119171 0.9015544 0.90673575 0.89637306 0.9119171 0.93298969
|
|
0.92268041 0.90721649 0.90206186 0.91752577]
|
|
|
|
mean value: 0.9110971636130548
|
|
|
|
key: test_roc_auc
|
|
value: [0.60353535 0.61497326 0.65775401 0.82085561 0.77807487 0.71825397
|
|
0.78174603 0.6984127 0.81349206 0.71031746]
|
|
|
|
mean value: 0.7197415329768271
|
|
|
|
key: train_roc_auc
|
|
value: [0.8420345 0.85329293 0.83701567 0.8129664 0.84904031 0.81776067
|
|
0.81893514 0.8396842 0.82761321 0.81635782]
|
|
|
|
mean value: 0.83147008487157
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.5625 0.55172414 0.72 0.71428571 0.57692308
|
|
0.71428571 0.64516129 0.73076923 0.60714286]
|
|
|
|
mean value: 0.6368246567114754
|
|
|
|
key: train_jcc
|
|
value: [0.76855895 0.77678571 0.76086957 0.73305085 0.7753304 0.75103734
|
|
0.74895397 0.76521739 0.75107296 0.74476987]
|
|
|
|
mean value: 0.7575647021850033
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.91022301 3.10206938 2.4169178 2.24846649 2.14688158 3.32444572
|
|
3.17221498 2.70555782 2.88934183 2.95270324]
|
|
|
|
mean value: 2.786882185935974
|
|
|
|
key: score_time
|
|
value: [0.02126956 0.05364895 0.02685308 0.01277757 0.03223562 0.05568361
|
|
0.01993418 0.02448583 0.0377295 0.02343059]
|
|
|
|
mean value: 0.03080484867095947
|
|
|
|
key: test_mcc
|
|
value: [0.28721348 0.3180697 0.04905525 0.63344389 0.63344389 0.24935216
|
|
0.58730159 0.38575837 0.39894181 0.53826045]
|
|
|
|
mean value: 0.40808405915621887
|
|
|
|
key: train_mcc
|
|
value: [0.98276047 0.98281969 0.96558803 0.97705869 0.95984626 0.97707007
|
|
0.97703249 0.96572357 0.98278047 0.97707007]
|
|
|
|
mean value: 0.9747749810147373
|
|
|
|
key: test_accuracy
|
|
value: [0.65 0.66666667 0.53846154 0.82051282 0.82051282 0.61538462
|
|
0.79487179 0.69230769 0.69230769 0.76923077]
|
|
|
|
mean value: 0.706025641025641
|
|
|
|
key: train_accuracy
|
|
value: [0.99145299 0.99147727 0.98295455 0.98863636 0.98011364 0.98863636
|
|
0.98863636 0.98295455 0.99147727 0.98863636]
|
|
|
|
mean value: 0.9874975718725718
|
|
|
|
key: test_fscore
|
|
value: [0.69565217 0.71111111 0.60869565 0.84444444 0.84444444 0.59459459
|
|
0.80952381 0.75 0.68421053 0.7804878 ]
|
|
|
|
mean value: 0.7323164561399199
|
|
|
|
key: train_fscore
|
|
value: [0.99220779 0.99220779 0.98445596 0.98963731 0.98191214 0.98974359
|
|
0.98969072 0.98469388 0.99228792 0.98974359]
|
|
|
|
mean value: 0.9886580689792606
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.69565217 0.58333333 0.82608696 0.82608696 0.6875
|
|
0.80952381 0.66666667 0.76470588 0.8 ]
|
|
|
|
mean value: 0.7326222445499939
|
|
|
|
key: train_precision
|
|
value: [0.99479167 0.99479167 0.98445596 0.98963731 0.97938144 0.98469388
|
|
0.98969072 0.97474747 0.98974359 0.98469388]
|
|
|
|
mean value: 0.9866627582123597
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.72727273 0.63636364 0.86363636 0.86363636 0.52380952
|
|
0.80952381 0.85714286 0.61904762 0.76190476]
|
|
|
|
mean value: 0.7389610389610389
|
|
|
|
key: train_recall
|
|
value: [0.98963731 0.98963731 0.98445596 0.98963731 0.98445596 0.99484536
|
|
0.98969072 0.99484536 0.99484536 0.99484536]
|
|
|
|
mean value: 0.9906895999145344
|
|
|
|
key: test_roc_auc
|
|
value: [0.64141414 0.65775401 0.52406417 0.81417112 0.81417112 0.62301587
|
|
0.79365079 0.67857143 0.6984127 0.76984127]
|
|
|
|
mean value: 0.7015066632713691
|
|
|
|
key: train_roc_auc
|
|
value: [0.9916541 0.991674 0.98279402 0.98852934 0.97964936 0.98792901
|
|
0.98851625 0.9815999 0.99109357 0.98792901]
|
|
|
|
mean value: 0.9871368547299765
|
|
|
|
key: test_jcc
|
|
value: [0.53333333 0.55172414 0.4375 0.73076923 0.73076923 0.42307692
|
|
0.68 0.6 0.52 0.64 ]
|
|
|
|
mean value: 0.5847172855879752
|
|
|
|
key: train_jcc
|
|
value: [0.98453608 0.98453608 0.96938776 0.97948718 0.96446701 0.97969543
|
|
0.97959184 0.96984925 0.98469388 0.97969543]
|
|
|
|
mean value: 0.9775939928074848
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03906608 0.0292635 0.05773568 0.03004766 0.03187251 0.03294325
|
|
0.03356886 0.0385108 0.03023219 0.04849863]
|
|
|
|
mean value: 0.037173914909362796
|
|
|
|
key: score_time
|
|
value: [0.01301384 0.02037883 0.01303339 0.01304865 0.01284504 0.01281381
|
|
0.01273775 0.0128572 0.01312661 0.03527904]
|
|
|
|
mean value: 0.01591341495513916
|
|
|
|
key: test_mcc
|
|
value: [0.14591299 0.53458203 0.58048707 0.42319443 0.5828877 0.62620255
|
|
0.4866238 0.59384599 0.49076688 0.53674504]
|
|
|
|
mean value: 0.5001248476975677
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.575 0.76923077 0.79487179 0.71794872 0.79487179 0.79487179
|
|
0.74358974 0.79487179 0.74358974 0.76923077]
|
|
|
|
mean value: 0.7498076923076923
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.60465116 0.79069767 0.82608696 0.75555556 0.81818182 0.77777778
|
|
0.7826087 0.82608696 0.75 0.8 ]
|
|
|
|
mean value: 0.7731646597420107
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.61904762 0.80952381 0.79166667 0.73913043 0.81818182 0.93333333
|
|
0.72 0.76 0.78947368 0.75 ]
|
|
|
|
mean value: 0.7730357365746382
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.59090909 0.77272727 0.86363636 0.77272727 0.81818182 0.66666667
|
|
0.85714286 0.9047619 0.71428571 0.85714286]
|
|
|
|
mean value: 0.7818181818181819
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.57323232 0.76871658 0.78475936 0.70989305 0.79144385 0.80555556
|
|
0.73412698 0.78571429 0.74603175 0.76190476]
|
|
|
|
mean value: 0.7461378490790256
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.43333333 0.65384615 0.7037037 0.60714286 0.69230769 0.63636364
|
|
0.64285714 0.7037037 0.6 0.66666667]
|
|
|
|
mean value: 0.633992488992489
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17244458 0.21761703 0.25768661 0.2097497 0.16652131 0.26184726
|
|
0.16725016 0.16593075 0.21627688 0.16753721]
|
|
|
|
mean value: 0.2002861499786377
|
|
|
|
key: score_time
|
|
value: [0.02551651 0.0255897 0.02410817 0.02407432 0.02434325 0.02443552
|
|
0.02443457 0.02423859 0.0243299 0.02459693]
|
|
|
|
mean value: 0.02456674575805664
|
|
|
|
key: test_mcc
|
|
value: [0.17545379 0.2045323 0.52791444 0.60639156 0.5828877 0.18205868
|
|
0.4866238 0.59160798 0.33245498 0.45848623]
|
|
|
|
mean value: 0.41484114591188126
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.61538462 0.76923077 0.79487179 0.79487179 0.58974359
|
|
0.74358974 0.76923077 0.66666667 0.71794872]
|
|
|
|
mean value: 0.7061538461538461
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.68 0.68085106 0.80851064 0.8 0.81818182 0.6
|
|
0.7826087 0.82352941 0.68292683 0.7027027 ]
|
|
|
|
mean value: 0.7379311159697353
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.60714286 0.64 0.76 0.88888889 0.81818182 0.63157895
|
|
0.72 0.7 0.7 0.8125 ]
|
|
|
|
mean value: 0.7278292511581985
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.72727273 0.86363636 0.72727273 0.81818182 0.57142857
|
|
0.85714286 1. 0.66666667 0.61904762]
|
|
|
|
mean value: 0.7623376623376623
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.58080808 0.59893048 0.75534759 0.80481283 0.79144385 0.59126984
|
|
0.73412698 0.75 0.66666667 0.72619048]
|
|
|
|
mean value: 0.6999596808420339
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.51515152 0.51612903 0.67857143 0.66666667 0.69230769 0.42857143
|
|
0.64285714 0.7 0.51851852 0.54166667]
|
|
|
|
mean value: 0.5900440091569124
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01549053 0.01434684 0.01424766 0.01411343 0.01413631 0.02322936
|
|
0.01407123 0.01369977 0.01412392 0.01400304]
|
|
|
|
mean value: 0.015146207809448243
|
|
|
|
key: score_time
|
|
value: [0.01235962 0.01235962 0.01237845 0.01249194 0.01238823 0.03227663
|
|
0.01191926 0.01195526 0.01224375 0.01255798]
|
|
|
|
mean value: 0.014293074607849121
|
|
|
|
key: test_mcc
|
|
value: [ 0.18463724 0.26162798 0.28117601 0.22340742 0.32713229 0.43085716
|
|
0.04948717 -0.03174603 0.11385501 0.20331252]
|
|
|
|
mean value: 0.2043746744064925
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.64102564 0.64102564 0.61538462 0.66666667 0.71794872
|
|
0.51282051 0.48717949 0.56410256 0.58974359]
|
|
|
|
mean value: 0.6035897435897436
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.65217391 0.69565217 0.66666667 0.65116279 0.69767442 0.74418605
|
|
0.45714286 0.52380952 0.62222222 0.55555556]
|
|
|
|
mean value: 0.6266246168167301
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 0.66666667 0.7 0.66666667 0.71428571 0.72727273
|
|
0.57142857 0.52380952 0.58333333 0.66666667]
|
|
|
|
mean value: 0.644512987012987
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.72727273 0.63636364 0.63636364 0.68181818 0.76190476
|
|
0.38095238 0.52380952 0.66666667 0.47619048]
|
|
|
|
mean value: 0.6173160173160173
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.59090909 0.62834225 0.64171123 0.61229947 0.6644385 0.71428571
|
|
0.52380952 0.48412698 0.55555556 0.59920635]
|
|
|
|
mean value: 0.6014684661743485
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.48387097 0.53333333 0.5 0.48275862 0.53571429 0.59259259
|
|
0.2962963 0.35483871 0.4516129 0.38461538]
|
|
|
|
mean value: 0.4615633093886709
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.69135904 2.70917821 2.36334229 2.38255072 2.58042264 1.61714864
|
|
1.72178507 1.6445508 1.63154292 1.63047242]
|
|
|
|
mean value: 2.0972352743148805
|
|
|
|
key: score_time
|
|
value: [0.14091563 0.12506819 0.12484097 0.21970868 0.09121585 0.0933814
|
|
0.0977962 0.09049273 0.09123826 0.09371853]
|
|
|
|
mean value: 0.11683764457702636
|
|
|
|
key: test_mcc
|
|
value: [0.33734954 0.42319443 0.52791444 0.56417112 0.5828877 0.41475753
|
|
0.53674504 0.59160798 0.54761905 0.65079365]
|
|
|
|
mean value: 0.5177040486022235
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.675 0.71794872 0.76923077 0.76923077 0.79487179 0.69230769
|
|
0.76923077 0.76923077 0.76923077 0.82051282]
|
|
|
|
mean value: 0.7546794871794872
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72340426 0.75555556 0.80851064 0.76923077 0.81818182 0.66666667
|
|
0.8 0.82352941 0.76923077 0.82051282]
|
|
|
|
mean value: 0.7754822704760127
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.68 0.73913043 0.76 0.88235294 0.81818182 0.8
|
|
0.75 0.7 0.83333333 0.88888889]
|
|
|
|
mean value: 0.7851887416363119
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.77272727 0.86363636 0.68181818 0.81818182 0.57142857
|
|
0.85714286 1. 0.71428571 0.76190476]
|
|
|
|
mean value: 0.7813852813852814
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66414141 0.70989305 0.75534759 0.78208556 0.79144385 0.70238095
|
|
0.76190476 0.75 0.77380952 0.82539683]
|
|
|
|
mean value: 0.7516403531109414
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.56666667 0.60714286 0.67857143 0.625 0.69230769 0.5
|
|
0.66666667 0.7 0.625 0.69565217]
|
|
|
|
mean value: 0.6357007485268354
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.86898923 0.97265959 0.94574857 0.97016168 0.99996591 0.94538593
|
|
0.92937469 0.92096901 0.91902447 0.92951322]
|
|
|
|
mean value: 1.040179228782654
|
|
|
|
key: score_time
|
|
value: [0.20569086 0.22888517 0.16473842 0.17857552 0.1652112 0.16618681
|
|
0.1822207 0.23874497 0.18768191 0.16507244]
|
|
|
|
mean value: 0.18830080032348634
|
|
|
|
key: test_mcc
|
|
value: [0.4411494 0.68677344 0.52791444 0.50266669 0.68677344 0.52048004
|
|
0.59384599 0.63427033 0.49076688 0.64116318]
|
|
|
|
mean value: 0.5725803825612074
|
|
|
|
key: train_mcc
|
|
value: [0.89658633 0.86227439 0.88013971 0.86240727 0.88543883 0.87963958
|
|
0.86809902 0.89152861 0.88512667 0.88528567]
|
|
|
|
mean value: 0.8796526074485547
|
|
|
|
key: test_accuracy
|
|
value: [0.725 0.84615385 0.76923077 0.74358974 0.84615385 0.74358974
|
|
0.79487179 0.79487179 0.74358974 0.82051282]
|
|
|
|
mean value: 0.7827564102564103
|
|
|
|
key: train_accuracy
|
|
value: [0.94871795 0.93181818 0.94034091 0.93181818 0.94318182 0.94034091
|
|
0.93465909 0.94602273 0.94318182 0.94318182]
|
|
|
|
mean value: 0.9403263403263403
|
|
|
|
key: test_fscore
|
|
value: [0.76595745 0.86956522 0.80851064 0.75 0.86956522 0.72222222
|
|
0.82608696 0.84 0.75 0.82926829]
|
|
|
|
mean value: 0.803117599131588
|
|
|
|
key: train_fscore
|
|
value: [0.95408163 0.93846154 0.94683544 0.93877551 0.94897959 0.94683544
|
|
0.94177215 0.95214106 0.94897959 0.94923858]
|
|
|
|
mean value: 0.9466100539581546
|
|
|
|
key: test_precision
|
|
value: [0.72 0.83333333 0.76 0.83333333 0.83333333 0.86666667
|
|
0.76 0.72413793 0.78947368 0.85 ]
|
|
|
|
mean value: 0.7970278281911676
|
|
|
|
key: train_precision
|
|
value: [0.93969849 0.92893401 0.92574257 0.92462312 0.93467337 0.93034826
|
|
0.92537313 0.93103448 0.93939394 0.935 ]
|
|
|
|
mean value: 0.9314821374471468
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.90909091 0.86363636 0.68181818 0.90909091 0.61904762
|
|
0.9047619 1. 0.71428571 0.80952381]
|
|
|
|
mean value: 0.822943722943723
|
|
|
|
key: train_recall
|
|
value: [0.96891192 0.94818653 0.96891192 0.95336788 0.96373057 0.96391753
|
|
0.95876289 0.9742268 0.95876289 0.96391753]
|
|
|
|
mean value: 0.9622696437156135
|
|
|
|
key: test_roc_auc
|
|
value: [0.71464646 0.8368984 0.75534759 0.7526738 0.8368984 0.75396825
|
|
0.78571429 0.77777778 0.74603175 0.82142857]
|
|
|
|
mean value: 0.7781385281385281
|
|
|
|
key: train_roc_auc
|
|
value: [0.94648128 0.93006811 0.93728615 0.92951413 0.94098478 0.93765497
|
|
0.93191309 0.9428096 0.94140676 0.94081952]
|
|
|
|
mean value: 0.9378938378597175
|
|
|
|
key: test_jcc
|
|
value: [0.62068966 0.76923077 0.67857143 0.6 0.76923077 0.56521739
|
|
0.7037037 0.72413793 0.6 0.70833333]
|
|
|
|
mean value: 0.6739114981581249
|
|
|
|
key: train_jcc
|
|
value: [0.91219512 0.88405797 0.89903846 0.88461538 0.90291262 0.89903846
|
|
0.88995215 0.90865385 0.90291262 0.90338164]
|
|
|
|
mean value: 0.8986758285152439
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0247333 0.00965047 0.00976729 0.00986314 0.00958753 0.00968504
|
|
0.01000524 0.00972891 0.00969481 0.00996637]
|
|
|
|
mean value: 0.011268210411071778
|
|
|
|
key: score_time
|
|
value: [0.0093112 0.00866818 0.00882983 0.00918913 0.0087862 0.00873756
|
|
0.00918365 0.00884771 0.00886369 0.0089376 ]
|
|
|
|
mean value: 0.00893547534942627
|
|
|
|
key: test_mcc
|
|
value: [0.12974982 0.06149733 0.28117601 0.44298485 0.5828877 0.43535772
|
|
0.64246755 0.37940161 0.53826045 0.27348302]
|
|
|
|
mean value: 0.37672660455215623
|
|
|
|
key: train_mcc
|
|
value: [0.49959039 0.53088207 0.49047512 0.48722902 0.49476397 0.47780376
|
|
0.46632656 0.47174736 0.44408311 0.50475435]
|
|
|
|
mean value: 0.4867655710273362
|
|
|
|
key: test_accuracy
|
|
value: [0.575 0.53846154 0.64102564 0.71794872 0.79487179 0.71794872
|
|
0.82051282 0.69230769 0.76923077 0.64102564]
|
|
|
|
mean value: 0.6908333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.75213675 0.76704545 0.74715909 0.74715909 0.75 0.74147727
|
|
0.73579545 0.73863636 0.72443182 0.75568182]
|
|
|
|
mean value: 0.7459523115773116
|
|
|
|
key: test_fscore
|
|
value: [0.63829787 0.59090909 0.66666667 0.73170732 0.81818182 0.73170732
|
|
0.84444444 0.73913043 0.7804878 0.68181818]
|
|
|
|
mean value: 0.7223350948167626
|
|
|
|
key: train_fscore
|
|
value: [0.77402597 0.78534031 0.76762402 0.77694236 0.77319588 0.76485788
|
|
0.75968992 0.7628866 0.74805195 0.78172589]
|
|
|
|
mean value: 0.7694340779160749
|
|
|
|
key: test_precision
|
|
value: [0.6 0.59090909 0.7 0.78947368 0.81818182 0.75
|
|
0.79166667 0.68 0.8 0.65217391]
|
|
|
|
mean value: 0.717240517301158
|
|
|
|
key: train_precision
|
|
value: [0.77604167 0.79365079 0.77368421 0.75242718 0.76923077 0.76683938
|
|
0.76165803 0.7628866 0.7539267 0.77 ]
|
|
|
|
mean value: 0.7680345333375814
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.59090909 0.63636364 0.68181818 0.81818182 0.71428571
|
|
0.9047619 0.80952381 0.76190476 0.71428571]
|
|
|
|
mean value: 0.7313852813852814
|
|
|
|
key: train_recall
|
|
value: [0.77202073 0.77720207 0.76165803 0.80310881 0.77720207 0.7628866
|
|
0.75773196 0.7628866 0.74226804 0.79381443]
|
|
|
|
mean value: 0.7710779338710538
|
|
|
|
key: test_roc_auc
|
|
value: [0.56313131 0.53074866 0.64171123 0.72326203 0.79144385 0.71825397
|
|
0.81349206 0.68253968 0.76984127 0.63492063]
|
|
|
|
mean value: 0.6869344707580002
|
|
|
|
key: train_roc_auc
|
|
value: [0.74993441 0.76595953 0.74560889 0.74117705 0.7470916 0.73903824
|
|
0.73329636 0.73587368 0.72239984 0.7513376 ]
|
|
|
|
mean value: 0.7431717191049403
|
|
|
|
key: test_jcc
|
|
value: [0.46875 0.41935484 0.5 0.57692308 0.69230769 0.57692308
|
|
0.73076923 0.5862069 0.64 0.51724138]
|
|
|
|
mean value: 0.5708476191494823
|
|
|
|
key: train_jcc
|
|
value: [0.63135593 0.64655172 0.62288136 0.6352459 0.6302521 0.61924686
|
|
0.6125 0.61666667 0.59751037 0.64166667]
|
|
|
|
mean value: 0.6253877583455207
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [2.41412258 1.95130968 0.16859365 0.86095262 0.63910151 0.23897123
|
|
1.51896191 0.15470123 0.44391131 1.22929597]
|
|
|
|
mean value: 0.9619921684265137
|
|
|
|
key: score_time
|
|
value: [0.01328516 0.01376867 0.01158524 0.02055979 0.0114162 0.01436687
|
|
0.01219177 0.01202273 0.01257467 0.01273799]
|
|
|
|
mean value: 0.013450908660888671
|
|
|
|
key: test_mcc
|
|
value: [0.5959596 0.59153067 0.74350254 0.59153067 0.68716578 0.56305327
|
|
0.70106818 0.7200823 0.56305327 0.74203177]
|
|
|
|
mean value: 0.6498978041274719
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.79487179 0.87179487 0.79487179 0.84615385 0.76923077
|
|
0.84615385 0.84615385 0.76923077 0.87179487]
|
|
|
|
mean value: 0.821025641025641
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.80952381 0.89361702 0.80952381 0.86363636 0.75675676
|
|
0.86956522 0.875 0.75675676 0.88372093]
|
|
|
|
mean value: 0.8336282483279773
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.85 0.84 0.85 0.86363636 0.875
|
|
0.8 0.77777778 0.875 0.86363636]
|
|
|
|
mean value: 0.8413232323232324
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.77272727 0.95454545 0.77272727 0.86363636 0.66666667
|
|
0.95238095 1. 0.66666667 0.9047619 ]
|
|
|
|
mean value: 0.8372294372294372
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7979798 0.79812834 0.85962567 0.79812834 0.84358289 0.77777778
|
|
0.83730159 0.83333333 0.77777778 0.86904762]
|
|
|
|
mean value: 0.8192683133859604
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.68 0.80769231 0.68 0.76 0.60869565
|
|
0.76923077 0.77777778 0.60869565 0.79166667]
|
|
|
|
mean value: 0.717606651802304
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05918598 0.08959985 0.13512325 0.04787421 0.03671384 0.12405276
|
|
0.09519744 0.09040046 0.09243035 0.09333897]
|
|
|
|
mean value: 0.08639171123504638
|
|
|
|
key: score_time
|
|
value: [0.03301191 0.02424884 0.03447104 0.01281452 0.0169549 0.03200459
|
|
0.033674 0.016186 0.02198958 0.02257538]
|
|
|
|
mean value: 0.024793076515197753
|
|
|
|
key: test_mcc
|
|
value: [0.49236596 0.04905525 0.19149207 0.52791444 0.47420071 0.17460317
|
|
0.54554473 0.49719968 0.44444444 0.56305327]
|
|
|
|
mean value: 0.3959873728319808
|
|
|
|
key: train_mcc
|
|
value: [0.79270453 0.75889909 0.79327937 0.74724952 0.79904804 0.81626246
|
|
0.77032494 0.76998824 0.80452977 0.76428139]
|
|
|
|
mean value: 0.7816567362861676
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.53846154 0.58974359 0.76923077 0.74358974 0.58974359
|
|
0.76923077 0.74358974 0.71794872 0.76923077]
|
|
|
|
mean value: 0.698076923076923
|
|
|
|
key: train_accuracy
|
|
value: [0.8974359 0.88068182 0.89772727 0.875 0.90056818 0.90909091
|
|
0.88636364 0.88636364 0.90340909 0.88352273]
|
|
|
|
mean value: 0.892016317016317
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.60869565 0.6 0.80851064 0.7826087 0.61904762
|
|
0.80851064 0.79166667 0.71794872 0.75675676]
|
|
|
|
mean value: 0.7276354080493765
|
|
|
|
key: train_fscore
|
|
value: [0.90862944 0.89175258 0.90769231 0.8877551 0.91002571 0.91919192
|
|
0.89690722 0.89795918 0.91326531 0.89514066]
|
|
|
|
mean value: 0.902831942606227
|
|
|
|
key: test_precision
|
|
value: [0.75 0.58333333 0.66666667 0.76 0.75 0.61904762
|
|
0.73076923 0.7037037 0.77777778 0.875 ]
|
|
|
|
mean value: 0.7216298331298331
|
|
|
|
key: train_precision
|
|
value: [0.89054726 0.88717949 0.89847716 0.87437186 0.90306122 0.9009901
|
|
0.89690722 0.88888889 0.9040404 0.88832487]
|
|
|
|
mean value: 0.893278847353825
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.63636364 0.54545455 0.86363636 0.81818182 0.61904762
|
|
0.9047619 0.9047619 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7443722943722944
|
|
|
|
key: train_recall
|
|
value: [0.92746114 0.89637306 0.91709845 0.9015544 0.91709845 0.93814433
|
|
0.89690722 0.90721649 0.92268041 0.90206186]
|
|
|
|
mean value: 0.912659580150633
|
|
|
|
key: test_roc_auc
|
|
value: [0.74242424 0.52406417 0.59625668 0.75534759 0.73262032 0.58730159
|
|
0.75793651 0.73015873 0.72222222 0.77777778]
|
|
|
|
mean value: 0.6926109837874543
|
|
|
|
key: train_roc_auc
|
|
value: [0.89411032 0.87900414 0.89565614 0.87216085 0.8988008 0.90578103
|
|
0.88516247 0.88398799 0.90121362 0.88141067]
|
|
|
|
mean value: 0.8897288028927673
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.4375 0.42857143 0.67857143 0.64285714 0.44827586
|
|
0.67857143 0.65517241 0.56 0.60869565]
|
|
|
|
mean value: 0.5781072499464553
|
|
|
|
key: train_jcc
|
|
value: [0.83255814 0.80465116 0.83098592 0.79816514 0.83490566 0.85046729
|
|
0.81308411 0.81481481 0.84037559 0.81018519]
|
|
|
|
mean value: 0.8230193004534195
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01592779 0.01515031 0.01602578 0.01546907 0.01665211 0.01538968
|
|
0.01587939 0.01483941 0.0104537 0.01027417]
|
|
|
|
mean value: 0.014606142044067382
|
|
|
|
key: score_time
|
|
value: [0.01331186 0.01377368 0.0143106 0.01406026 0.01515913 0.01358914
|
|
0.01425743 0.01117134 0.0091939 0.00872111]
|
|
|
|
mean value: 0.01275484561920166
|
|
|
|
key: test_mcc
|
|
value: [0.23071239 0.15534161 0.42319443 0.32713229 0.58048707 0.43535772
|
|
0.46953014 0.38575837 0.37805005 0.34126984]
|
|
|
|
mean value: 0.3726833915774863
|
|
|
|
key: train_mcc
|
|
value: [0.43234492 0.44054161 0.39960965 0.42287405 0.40527156 0.39251133
|
|
0.41594087 0.42184628 0.40408608 0.41606682]
|
|
|
|
mean value: 0.4151093163669143
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.58974359 0.71794872 0.66666667 0.79487179 0.71794872
|
|
0.71794872 0.69230769 0.69230769 0.66666667]
|
|
|
|
mean value: 0.6881410256410256
|
|
|
|
key: train_accuracy
|
|
value: [0.72079772 0.72443182 0.70454545 0.71590909 0.70738636 0.70170455
|
|
0.71306818 0.71590909 0.70738636 0.71306818]
|
|
|
|
mean value: 0.7124206811706811
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.65217391 0.75555556 0.69767442 0.82608696 0.73170732
|
|
0.78431373 0.75 0.72727273 0.66666667]
|
|
|
|
mean value: 0.7297333633169361
|
|
|
|
key: train_fscore
|
|
value: [0.75980392 0.76399027 0.75 0.75609756 0.74939173 0.74327628
|
|
0.75305623 0.75490196 0.75060533 0.75184275]
|
|
|
|
mean value: 0.7532966035519044
|
|
|
|
key: test_precision
|
|
value: [0.62068966 0.625 0.73913043 0.71428571 0.79166667 0.75
|
|
0.66666667 0.66666667 0.69565217 0.72222222]
|
|
|
|
mean value: 0.6991980200376002
|
|
|
|
key: train_precision
|
|
value: [0.72093023 0.72018349 0.69955157 0.71428571 0.70642202 0.70697674
|
|
0.71627907 0.71962617 0.70776256 0.71830986]
|
|
|
|
mean value: 0.7130327419348079
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.68181818 0.77272727 0.68181818 0.86363636 0.71428571
|
|
0.95238095 0.85714286 0.76190476 0.61904762]
|
|
|
|
mean value: 0.7722943722943723
|
|
|
|
key: train_recall
|
|
value: [0.80310881 0.8134715 0.80829016 0.80310881 0.79792746 0.78350515
|
|
0.79381443 0.79381443 0.79896907 0.78865979]
|
|
|
|
mean value: 0.7984669622349233
|
|
|
|
key: test_roc_auc
|
|
value: [0.60353535 0.57620321 0.70989305 0.6644385 0.78475936 0.71825397
|
|
0.6984127 0.67857143 0.68650794 0.67063492]
|
|
|
|
mean value: 0.6791210423563364
|
|
|
|
key: train_roc_auc
|
|
value: [0.71168099 0.71491185 0.69345325 0.70658585 0.69770587 0.69238549
|
|
0.70386924 0.7070338 0.69695289 0.70445648]
|
|
|
|
mean value: 0.702903571078452
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.48387097 0.60714286 0.53571429 0.7037037 0.57692308
|
|
0.64516129 0.6 0.57142857 0.5 ]
|
|
|
|
mean value: 0.5769399298431557
|
|
|
|
key: train_jcc
|
|
value: [0.61264822 0.61811024 0.6 0.60784314 0.59922179 0.59143969
|
|
0.60392157 0.60629921 0.60077519 0.6023622 ]
|
|
|
|
mean value: 0.6042621253167205
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01936674 0.01978564 0.01572657 0.01591492 0.01743746 0.01959467
|
|
0.01773 0.0185678 0.01736331 0.01891422]
|
|
|
|
mean value: 0.01804013252258301
|
|
|
|
key: score_time
|
|
value: [0.00950718 0.01122332 0.01133323 0.01174617 0.01184201 0.01192522
|
|
0.01185989 0.01185942 0.01192927 0.01191807]
|
|
|
|
mean value: 0.011514377593994141
|
|
|
|
key: test_mcc
|
|
value: [0.20100756 0.43117497 0.2266439 0.5464364 0.6947088 0.33410548
|
|
0.64246755 0.16891598 0.47837594 0.54554473]
|
|
|
|
mean value: 0.426938130250252
|
|
|
|
key: train_mcc
|
|
value: [0.45569532 0.71555702 0.53425074 0.65630427 0.68921205 0.64990271
|
|
0.68603947 0.68075596 0.65541085 0.57746071]
|
|
|
|
mean value: 0.6300589116802254
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.71794872 0.58974359 0.76923077 0.84615385 0.66666667
|
|
0.82051282 0.58974359 0.71794872 0.76923077]
|
|
|
|
mean value: 0.7087179487179487
|
|
|
|
key: train_accuracy
|
|
value: [0.6951567 0.85795455 0.71590909 0.82954545 0.84090909 0.81534091
|
|
0.84375 0.82954545 0.8125 0.76704545]
|
|
|
|
mean value: 0.8007656695156695
|
|
|
|
key: test_fscore
|
|
value: [0.72413793 0.78431373 0.55555556 0.7804878 0.875 0.73469388
|
|
0.84444444 0.63636364 0.68571429 0.80851064]
|
|
|
|
mean value: 0.7429221899329542
|
|
|
|
key: train_fscore
|
|
value: [0.78296146 0.87745098 0.65517241 0.84375 0.86792453 0.85327314
|
|
0.85564304 0.82758621 0.80588235 0.82478632]
|
|
|
|
mean value: 0.8194430449874387
|
|
|
|
key: test_precision
|
|
value: [0.58333333 0.68965517 0.71428571 0.84210526 0.80769231 0.64285714
|
|
0.79166667 0.60869565 0.85714286 0.73076923]
|
|
|
|
mean value: 0.7268203340492854
|
|
|
|
key: train_precision
|
|
value: [0.64333333 0.83255814 0.97938144 0.84816754 0.7965368 0.75903614
|
|
0.87165775 0.93506494 0.93835616 0.70437956]
|
|
|
|
mean value: 0.8308471812052299
|
|
|
|
key: test_recall
|
|
value: [0.95454545 0.90909091 0.45454545 0.72727273 0.95454545 0.85714286
|
|
0.9047619 0.66666667 0.57142857 0.9047619 ]
|
|
|
|
mean value: 0.7904761904761904
|
|
|
|
key: train_recall
|
|
value: [1. 0.92746114 0.49222798 0.83937824 0.95336788 0.9742268
|
|
0.84020619 0.74226804 0.70618557 0.99484536]
|
|
|
|
mean value: 0.8470167191923508
|
|
|
|
key: test_roc_auc
|
|
value: [0.56060606 0.68983957 0.60962567 0.77540107 0.8302139 0.65079365
|
|
0.81349206 0.58333333 0.73015873 0.75793651]
|
|
|
|
mean value: 0.7001400560224089
|
|
|
|
key: train_roc_auc
|
|
value: [0.66139241 0.85052302 0.73982468 0.82849415 0.8288852 0.79723998
|
|
0.84415373 0.83948845 0.82461177 0.74109357]
|
|
|
|
mean value: 0.7955706953974652
|
|
|
|
key: test_jcc
|
|
value: [0.56756757 0.64516129 0.38461538 0.64 0.77777778 0.58064516
|
|
0.73076923 0.46666667 0.52173913 0.67857143]
|
|
|
|
mean value: 0.5993513638015742
|
|
|
|
key: train_jcc
|
|
value: [0.64333333 0.78165939 0.48717949 0.72972973 0.76666667 0.74409449
|
|
0.74770642 0.70588235 0.67487685 0.70181818]
|
|
|
|
mean value: 0.6982946897812828
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02165914 0.03685188 0.02395153 0.01961398 0.01859236 0.02534246
|
|
0.02067137 0.0190196 0.02112269 0.01930928]
|
|
|
|
mean value: 0.02261343002319336
|
|
|
|
key: score_time
|
|
value: [0.01186466 0.01319265 0.01339817 0.01180243 0.0114572 0.01194096
|
|
0.0117588 0.01214409 0.01232433 0.01206565]
|
|
|
|
mean value: 0.01219489574432373
|
|
|
|
key: test_mcc
|
|
value: [0.32824398 0.32178399 0.2513369 0.63570849 0.39777409 0.37115374
|
|
0.54554473 0.54870326 0.34293954 0.64116318]
|
|
|
|
mean value: 0.43843519028638167
|
|
|
|
key: train_mcc
|
|
value: [0.62855308 0.72812128 0.65071816 0.70660114 0.39479175 0.67705292
|
|
0.73872051 0.75859297 0.49184251 0.68963745]
|
|
|
|
mean value: 0.6464631763631339
|
|
|
|
key: test_accuracy
|
|
value: [0.65 0.66666667 0.61538462 0.82051282 0.64102564 0.66666667
|
|
0.76923077 0.74358974 0.61538462 0.82051282]
|
|
|
|
mean value: 0.7008974358974359
|
|
|
|
key: train_accuracy
|
|
value: [0.78347578 0.85511364 0.80965909 0.84659091 0.61079545 0.83806818
|
|
0.86647727 0.87784091 0.68181818 0.84659091]
|
|
|
|
mean value: 0.8016430328930328
|
|
|
|
key: test_fscore
|
|
value: [0.63157895 0.75471698 0.61538462 0.85106383 0.5625 0.62857143
|
|
0.80851064 0.80769231 0.48275862 0.82926829]
|
|
|
|
mean value: 0.6972045661606536
|
|
|
|
key: train_fscore
|
|
value: [0.75949367 0.88221709 0.80118694 0.8744186 0.4497992 0.848
|
|
0.88836105 0.89638554 0.5971223 0.86567164]
|
|
|
|
mean value: 0.7862656037262483
|
|
|
|
key: test_precision
|
|
value: [0.75 0.64516129 0.70588235 0.8 0.9 0.78571429
|
|
0.73076923 0.67741935 0.875 0.85 ]
|
|
|
|
mean value: 0.7719946514585984
|
|
|
|
key: train_precision
|
|
value: [0.97560976 0.79583333 0.9375 0.79324895 1. 0.87845304
|
|
0.82378855 0.84162896 0.98809524 0.83653846]
|
|
|
|
mean value: 0.8870696278417831
|
|
|
|
key: test_recall
|
|
value: [0.54545455 0.90909091 0.54545455 0.90909091 0.40909091 0.52380952
|
|
0.9047619 1. 0.33333333 0.80952381]
|
|
|
|
mean value: 0.688961038961039
|
|
|
|
key: train_recall
|
|
value: [0.62176166 0.98963731 0.69948187 0.97409326 0.29015544 0.81958763
|
|
0.96391753 0.95876289 0.42783505 0.89690722]
|
|
|
|
mean value: 0.7642139842957107
|
|
|
|
key: test_roc_auc
|
|
value: [0.66161616 0.63101604 0.62566845 0.80748663 0.67513369 0.67857143
|
|
0.75793651 0.72222222 0.63888889 0.82142857]
|
|
|
|
mean value: 0.7019968593498005
|
|
|
|
key: train_roc_auc
|
|
value: [0.80138716 0.8407306 0.82143905 0.83295858 0.64507772 0.84017356
|
|
0.85537648 0.86862195 0.71075297 0.84085867]
|
|
|
|
mean value: 0.8057376744183752
|
|
|
|
key: test_jcc
|
|
value: [0.46153846 0.60606061 0.44444444 0.74074074 0.39130435 0.45833333
|
|
0.67857143 0.67741935 0.31818182 0.70833333]
|
|
|
|
mean value: 0.5484927868868963
|
|
|
|
key: train_jcc
|
|
value: [0.6122449 0.7892562 0.66831683 0.7768595 0.29015544 0.73611111
|
|
0.7991453 0.81222707 0.42564103 0.76315789]
|
|
|
|
mean value: 0.6673115277406285
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.18501496 0.22413325 0.14739823 0.14828968 0.1485734 0.15752697
|
|
0.16709542 0.14933062 0.17041063 0.21680474]
|
|
|
|
mean value: 0.17145779132843017
|
|
|
|
key: score_time
|
|
value: [0.01990271 0.02225757 0.01513314 0.01543474 0.01512551 0.01812315
|
|
0.01513577 0.01510811 0.0234859 0.01809883]
|
|
|
|
mean value: 0.017780542373657227
|
|
|
|
key: test_mcc
|
|
value: [0.49236596 0.5464364 0.52831916 0.55002604 0.6947088 0.38786415
|
|
0.74819006 0.44840472 0.44444444 0.6383069 ]
|
|
|
|
mean value: 0.5479066635637294
|
|
|
|
key: train_mcc
|
|
value: [0.97697908 0.93712371 0.94863098 0.93126941 0.95411738 0.93686739
|
|
0.94836906 0.95447318 0.93176511 0.93118204]
|
|
|
|
mean value: 0.9450777327437905
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.76923077 0.76923077 0.74358974 0.84615385 0.69230769
|
|
0.87179487 0.71794872 0.71794872 0.82051282]
|
|
|
|
mean value: 0.7698717948717949
|
|
|
|
key: train_accuracy
|
|
value: [0.98860399 0.96875 0.97443182 0.96590909 0.97727273 0.96875
|
|
0.97443182 0.97727273 0.96590909 0.96590909]
|
|
|
|
mean value: 0.9727240352240353
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.7804878 0.8 0.72222222 0.875 0.7
|
|
0.88888889 0.7755102 0.71794872 0.8372093 ]
|
|
|
|
mean value: 0.7879875835997265
|
|
|
|
key: train_fscore
|
|
value: [0.98963731 0.97186701 0.9769821 0.96923077 0.97927461 0.97186701
|
|
0.97674419 0.97969543 0.96969697 0.96938776]
|
|
|
|
mean value: 0.9754383141178787
|
|
|
|
key: test_precision
|
|
value: [0.75 0.84210526 0.7826087 0.92857143 0.80769231 0.73684211
|
|
0.83333333 0.67857143 0.77777778 0.81818182]
|
|
|
|
mean value: 0.795568415820132
|
|
|
|
key: train_precision
|
|
value: [0.98963731 0.95959596 0.96464646 0.95939086 0.97927461 0.96446701
|
|
0.97927461 0.965 0.95049505 0.95959596]
|
|
|
|
mean value: 0.9671377829861048
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.72727273 0.81818182 0.59090909 0.95454545 0.66666667
|
|
0.95238095 0.9047619 0.66666667 0.85714286]
|
|
|
|
mean value: 0.7956709956709956
|
|
|
|
key: train_recall
|
|
value: [0.98963731 0.98445596 0.98963731 0.97927461 0.97927461 0.97938144
|
|
0.9742268 0.99484536 0.98969072 0.97938144]
|
|
|
|
mean value: 0.983980556594199
|
|
|
|
key: test_roc_auc
|
|
value: [0.74242424 0.77540107 0.76203209 0.76604278 0.8302139 0.69444444
|
|
0.86507937 0.70238095 0.72222222 0.81746032]
|
|
|
|
mean value: 0.7677701383583736
|
|
|
|
key: train_roc_auc
|
|
value: [0.98848954 0.96707075 0.97280607 0.96448007 0.97705869 0.96753882
|
|
0.97445517 0.97527078 0.96319979 0.96437427]
|
|
|
|
mean value: 0.9714743958036675
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.64 0.66666667 0.56521739 0.77777778 0.53846154
|
|
0.8 0.63333333 0.56 0.72 ]
|
|
|
|
mean value: 0.6544313850400807
|
|
|
|
key: train_jcc
|
|
value: [0.97948718 0.94527363 0.955 0.94029851 0.95939086 0.94527363
|
|
0.95454545 0.960199 0.94117647 0.94059406]
|
|
|
|
mean value: 0.9521238803090375
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0628612 0.08177304 0.0515604 0.07316899 0.06709433 0.06401539
|
|
0.07528853 0.07673025 0.06154585 0.06542706]
|
|
|
|
mean value: 0.06794650554656982
|
|
|
|
key: score_time
|
|
value: [0.02510285 0.02193046 0.01847267 0.03015375 0.01803255 0.02999711
|
|
0.02398348 0.0255239 0.02519321 0.03015709]
|
|
|
|
mean value: 0.024854707717895507
|
|
|
|
key: test_mcc
|
|
value: [0.40201513 0.35561497 0.69498222 0.48807911 0.74203177 0.52048004
|
|
0.69175116 0.48261709 0.71011643 0.54761905]
|
|
|
|
mean value: 0.5635306969360055
|
|
|
|
key: train_mcc
|
|
value: [0.954074 0.96006097 0.95483116 0.94842621 0.97736551 0.96031806
|
|
0.99427116 0.94836906 0.95478908 0.94383671]
|
|
|
|
mean value: 0.9596341899966139
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.66666667 0.84615385 0.74358974 0.87179487 0.74358974
|
|
0.84615385 0.74358974 0.84615385 0.76923077]
|
|
|
|
mean value: 0.7776923076923077
|
|
|
|
key: train_accuracy
|
|
value: [0.97720798 0.98011364 0.97727273 0.97443182 0.98863636 0.98011364
|
|
0.99715909 0.97443182 0.97727273 0.97159091]
|
|
|
|
mean value: 0.9798230704480705
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.66666667 0.85714286 0.76190476 0.88372093 0.72222222
|
|
0.86363636 0.77272727 0.84210526 0.76923077]
|
|
|
|
mean value: 0.7853642821207081
|
|
|
|
key: train_fscore
|
|
value: [0.97916667 0.98172324 0.97894737 0.97662338 0.9895288 0.98172324
|
|
0.99742931 0.97674419 0.97905759 0.97368421]
|
|
|
|
mean value: 0.9814627976826897
|
|
|
|
key: test_precision
|
|
value: [0.75 0.76470588 0.9 0.8 0.9047619 0.86666667
|
|
0.82608696 0.73913043 0.94117647 0.83333333]
|
|
|
|
mean value: 0.8325861649007429
|
|
|
|
key: train_precision
|
|
value: [0.98429319 0.98947368 0.99465241 0.97916667 1. 0.99470899
|
|
0.99487179 0.97927461 0.99468085 0.99462366]
|
|
|
|
mean value: 0.9905745858969144
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.59090909 0.81818182 0.72727273 0.86363636 0.61904762
|
|
0.9047619 0.80952381 0.76190476 0.71428571]
|
|
|
|
mean value: 0.7491341991341991
|
|
|
|
key: train_recall
|
|
value: [0.97409326 0.97409326 0.96373057 0.97409326 0.97927461 0.96907216
|
|
1. 0.9742268 0.96391753 0.95360825]
|
|
|
|
mean value: 0.9726109716361305
|
|
|
|
key: test_roc_auc
|
|
value: [0.7020202 0.67780749 0.85026738 0.7459893 0.87299465 0.75396825
|
|
0.84126984 0.73809524 0.8531746 0.77380952]
|
|
|
|
mean value: 0.7809396485867074
|
|
|
|
key: train_roc_auc
|
|
value: [0.97755296 0.98075732 0.97872063 0.97446802 0.98963731 0.98137153
|
|
0.99683544 0.97445517 0.97879421 0.97363957]
|
|
|
|
mean value: 0.9806232152982022
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.5 0.75 0.61538462 0.79166667 0.56521739
|
|
0.76 0.62962963 0.72727273 0.625 ]
|
|
|
|
mean value: 0.6519726585813542
|
|
|
|
key: train_jcc
|
|
value: [0.95918367 0.96410256 0.95876289 0.95431472 0.97927461 0.96410256
|
|
0.99487179 0.95454545 0.95897436 0.94871795]
|
|
|
|
mean value: 0.9636850577593158
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09267759 0.10598755 0.13946033 0.09499311 0.1663239 0.14514065
|
|
0.1052444 0.09951377 0.11682558 0.12099361]
|
|
|
|
mean value: 0.11871604919433594
|
|
|
|
key: score_time
|
|
value: [0.02805328 0.01415372 0.02762222 0.01744652 0.01424694 0.06440473
|
|
0.01440763 0.02244258 0.02212453 0.02216029]
|
|
|
|
mean value: 0.024706244468688965
|
|
|
|
key: test_mcc
|
|
value: [0.01609348 0.19821695 0.47532708 0.19821695 0.52831916 0.38095238
|
|
0.43643578 0.32713229 0.38786415 0.22340742]
|
|
|
|
mean value: 0.31719656277699165
|
|
|
|
key: train_mcc
|
|
value: [0.98276047 0.97705869 0.98281969 0.98280066 0.97714822 0.99427116
|
|
0.98851625 0.97707007 0.98278047 0.97723742]
|
|
|
|
mean value: 0.9822463092364953
|
|
|
|
key: test_accuracy
|
|
value: [0.525 0.61538462 0.74358974 0.61538462 0.76923077 0.69230769
|
|
0.71794872 0.66666667 0.69230769 0.61538462]
|
|
|
|
mean value: 0.6653205128205129
|
|
|
|
key: train_accuracy
|
|
value: [0.99145299 0.98863636 0.99147727 0.99147727 0.98863636 0.99715909
|
|
0.99431818 0.98863636 0.99147727 0.98863636]
|
|
|
|
mean value: 0.9911907536907537
|
|
|
|
key: test_fscore
|
|
value: [0.6122449 0.69387755 0.79166667 0.69387755 0.8 0.71428571
|
|
0.76595745 0.69767442 0.7 0.65116279]
|
|
|
|
mean value: 0.7120747037063218
|
|
|
|
key: train_fscore
|
|
value: [0.99220779 0.98963731 0.99220779 0.99224806 0.98958333 0.99742931
|
|
0.99484536 0.98974359 0.99228792 0.98979592]
|
|
|
|
mean value: 0.9919986378049969
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.62962963 0.73076923 0.62962963 0.7826087 0.71428571
|
|
0.69230769 0.68181818 0.73684211 0.63636364]
|
|
|
|
mean value: 0.6789810071274602
|
|
|
|
key: train_precision
|
|
value: [0.99479167 0.98963731 0.99479167 0.98969072 0.9947644 0.99487179
|
|
0.99484536 0.98469388 0.98974359 0.97979798]
|
|
|
|
mean value: 0.9907628361377186
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.77272727 0.86363636 0.77272727 0.81818182 0.71428571
|
|
0.85714286 0.71428571 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7528138528138528
|
|
|
|
key: train_recall
|
|
value: [0.98963731 0.98963731 0.98963731 0.99481865 0.98445596 1.
|
|
0.99484536 0.99484536 0.99484536 1. ]
|
|
|
|
mean value: 0.9932722610971636
|
|
|
|
key: test_roc_auc
|
|
value: [0.50757576 0.59224599 0.72593583 0.59224599 0.76203209 0.69047619
|
|
0.70634921 0.66269841 0.69444444 0.61111111]
|
|
|
|
mean value: 0.6545115015703251
|
|
|
|
key: train_roc_auc
|
|
value: [0.9916541 0.98852934 0.991674 0.99112002 0.98908333 0.99683544
|
|
0.99425812 0.98792901 0.99109357 0.98734177]
|
|
|
|
mean value: 0.9909518697413212
|
|
|
|
key: test_jcc
|
|
value: [0.44117647 0.53125 0.65517241 0.53125 0.66666667 0.55555556
|
|
0.62068966 0.53571429 0.53846154 0.48275862]
|
|
|
|
mean value: 0.5558695206641454
|
|
|
|
key: train_jcc
|
|
value: [0.98453608 0.97948718 0.98453608 0.98461538 0.97938144 0.99487179
|
|
0.98974359 0.97969543 0.98469388 0.97979798]
|
|
|
|
mean value: 0.9841358845786453
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.65745234 0.59602952 0.60603476 0.63420153 0.61747909 0.58377504
|
|
0.63561296 0.65917778 0.65637898 0.67190957]
|
|
|
|
mean value: 0.631805157661438
|
|
|
|
key: score_time
|
|
value: [0.00936556 0.00926352 0.01064134 0.01090002 0.00955009 0.00982285
|
|
0.0108304 0.01085973 0.01127481 0.01107192]
|
|
|
|
mean value: 0.010358023643493652
|
|
|
|
key: test_mcc
|
|
value: [0.50251891 0.64988795 0.74350254 0.71011643 0.79144385 0.69657235
|
|
0.70106818 0.65465367 0.59366961 0.69047619]
|
|
|
|
mean value: 0.6733909681187302
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99427786 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999427786446393
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.82051282 0.87179487 0.84615385 0.8974359 0.84615385
|
|
0.84615385 0.82051282 0.79487179 0.84615385]
|
|
|
|
mean value: 0.833974358974359
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99715909 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997159090909091
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.82926829 0.89361702 0.85 0.90909091 0.85
|
|
0.86956522 0.85106383 0.8 0.85714286]
|
|
|
|
mean value: 0.847165288927659
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99741602 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997416020671834
|
|
|
|
key: test_precision
|
|
value: [0.8 0.89473684 0.84 0.94444444 0.90909091 0.89473684
|
|
0.8 0.76923077 0.84210526 0.85714286]
|
|
|
|
mean value: 0.8551487927277401
|
|
|
|
key: train_precision
|
|
value: [1. 0.99484536 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994845360824742
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.77272727 0.95454545 0.77272727 0.90909091 0.80952381
|
|
0.95238095 0.95238095 0.76190476 0.85714286]
|
|
|
|
mean value: 0.8469696969696969
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75252525 0.82754011 0.85962567 0.85695187 0.89572193 0.84920635
|
|
0.83730159 0.80952381 0.79761905 0.8452381 ]
|
|
|
|
mean value: 0.8331253713606654
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99685535 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999685534591195
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.70833333 0.80769231 0.73913043 0.83333333 0.73913043
|
|
0.76923077 0.74074074 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7369642635946984
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99484536 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994845360824742
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.12581205 0.08368993 0.05365229 0.05670118 0.05033875 0.04044056
|
|
0.03177214 0.08045316 0.04733801 0.04449892]
|
|
|
|
mean value: 0.06146969795227051
|
|
|
|
key: score_time
|
|
value: [0.02203012 0.0189116 0.02993417 0.03207183 0.02699876 0.01452136
|
|
0.02443647 0.01289511 0.02427244 0.02420735]
|
|
|
|
mean value: 0.023027920722961427
|
|
|
|
key: test_mcc
|
|
value: [0.05025189 0.13434332 0.32178399 0.00944069 0.53206037 0.44840472
|
|
0.31180478 0.31180478 0.3474523 0.10329663]
|
|
|
|
mean value: 0.2570643476636058
|
|
|
|
key: train_mcc
|
|
value: [0.47126888 0.44829265 0.44829265 0.44829265 0.42181028 0.44030757
|
|
0.44030757 0.44560397 0.44560397 0.44030757]
|
|
|
|
mean value: 0.4450087760196973
|
|
|
|
key: test_accuracy
|
|
value: [0.55 0.58974359 0.66666667 0.53846154 0.74358974 0.71794872
|
|
0.61538462 0.61538462 0.66666667 0.56410256]
|
|
|
|
mean value: 0.6267948717948718
|
|
|
|
key: train_accuracy
|
|
value: [0.7037037 0.69034091 0.69034091 0.69034091 0.67613636 0.6875
|
|
0.6875 0.69034091 0.69034091 0.6875 ]
|
|
|
|
mean value: 0.6894044612794613
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.72413793 0.75471698 0.65384615 0.81481481 0.7755102
|
|
0.73684211 0.73684211 0.74509804 0.66666667]
|
|
|
|
mean value: 0.7275141667984495
|
|
|
|
key: train_fscore
|
|
value: [0.7877551 0.77979798 0.77979798 0.77979798 0.772 0.77911647
|
|
0.77911647 0.7806841 0.7806841 0.77911647]
|
|
|
|
mean value: 0.779786664828065
|
|
|
|
key: test_precision
|
|
value: [0.5625 0.58333333 0.64516129 0.56666667 0.6875 0.67857143
|
|
0.58333333 0.58333333 0.63333333 0.56666667]
|
|
|
|
mean value: 0.6090399385560676
|
|
|
|
key: train_precision
|
|
value: [0.64983165 0.63907285 0.63907285 0.63907285 0.6286645 0.63815789
|
|
0.63815789 0.64026403 0.64026403 0.63815789]
|
|
|
|
mean value: 0.6390716425007821
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.95454545 0.90909091 0.77272727 1. 0.9047619
|
|
1. 1. 0.9047619 0.80952381]
|
|
|
|
mean value: 0.9073593073593074
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.52020202 0.53609626 0.63101604 0.5040107 0.70588235 0.70238095
|
|
0.58333333 0.58333333 0.6468254 0.54365079]
|
|
|
|
mean value: 0.5956731177319412
|
|
|
|
key: train_roc_auc
|
|
value: [0.67088608 0.6572327 0.6572327 0.6572327 0.64150943 0.65189873
|
|
0.65189873 0.65506329 0.65506329 0.65189873]
|
|
|
|
mean value: 0.6549916407929305
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.56756757 0.60606061 0.48571429 0.6875 0.63333333
|
|
0.58333333 0.58333333 0.59375 0.5 ]
|
|
|
|
mean value: 0.574059245934246
|
|
|
|
key: train_jcc
|
|
value: [0.64983165 0.63907285 0.63907285 0.63907285 0.6286645 0.63815789
|
|
0.63815789 0.64026403 0.64026403 0.63815789]
|
|
|
|
mean value: 0.6390716425007821
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04989004 0.03763151 0.04120517 0.05915475 0.04827642 0.03862739
|
|
0.04097748 0.04380655 0.0387454 0.04259706]
|
|
|
|
mean value: 0.04409117698669433
|
|
|
|
key: score_time
|
|
value: [0.0314424 0.02498007 0.01469374 0.02595615 0.0258348 0.02157927
|
|
0.04579115 0.03543663 0.02513885 0.03504443]
|
|
|
|
mean value: 0.028589749336242677
|
|
|
|
key: test_mcc
|
|
value: [0.4411494 0.21294497 0.32713229 0.63344389 0.63344389 0.3539192
|
|
0.4866238 0.59160798 0.49076688 0.65079365]
|
|
|
|
mean value: 0.4821825943663392
|
|
|
|
key: train_mcc
|
|
value: [0.77522299 0.73593017 0.76499433 0.71848548 0.74731628 0.75280405
|
|
0.76436443 0.76997315 0.74696056 0.74696087]
|
|
|
|
mean value: 0.7523012309263739
|
|
|
|
key: test_accuracy
|
|
value: [0.725 0.61538462 0.66666667 0.82051282 0.82051282 0.66666667
|
|
0.74358974 0.76923077 0.74358974 0.82051282]
|
|
|
|
mean value: 0.7391666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.88888889 0.86931818 0.88352273 0.86079545 0.875 0.87784091
|
|
0.88352273 0.88636364 0.875 0.875 ]
|
|
|
|
mean value: 0.8775252525252525
|
|
|
|
key: test_fscore
|
|
value: [0.76595745 0.66666667 0.69767442 0.84444444 0.84444444 0.64864865
|
|
0.7826087 0.82352941 0.75 0.82051282]
|
|
|
|
mean value: 0.7644486997547066
|
|
|
|
key: train_fscore
|
|
value: [0.90025575 0.88383838 0.89350649 0.87468031 0.88832487 0.89168766
|
|
0.89672544 0.89847716 0.8877551 0.88888889]
|
|
|
|
mean value: 0.8904140058349286
|
|
|
|
key: test_precision
|
|
value: [0.72 0.65217391 0.71428571 0.82608696 0.82608696 0.75
|
|
0.72 0.7 0.78947368 0.88888889]
|
|
|
|
mean value: 0.7586996113472086
|
|
|
|
key: train_precision
|
|
value: [0.88888889 0.86206897 0.89583333 0.86363636 0.87064677 0.87192118
|
|
0.87684729 0.885 0.87878788 0.87128713]
|
|
|
|
mean value: 0.8764917797952135
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.68181818 0.68181818 0.86363636 0.86363636 0.57142857
|
|
0.85714286 1. 0.71428571 0.76190476]
|
|
|
|
mean value: 0.7813852813852814
|
|
|
|
key: train_recall
|
|
value: [0.9119171 0.90673575 0.89119171 0.88601036 0.90673575 0.91237113
|
|
0.91752577 0.91237113 0.89690722 0.90721649]
|
|
|
|
mean value: 0.9048982426152449
|
|
|
|
key: test_roc_auc
|
|
value: [0.71464646 0.60561497 0.6644385 0.81417112 0.81417112 0.67460317
|
|
0.73412698 0.75 0.74603175 0.82539683]
|
|
|
|
mean value: 0.7343200916730328
|
|
|
|
key: train_roc_auc
|
|
value: [0.8863383 0.86531756 0.88270277 0.85809952 0.87160687 0.87390709
|
|
0.87964896 0.88340076 0.87250424 0.87132977]
|
|
|
|
mean value: 0.8744855833727446
|
|
|
|
key: test_jcc
|
|
value: [0.62068966 0.5 0.53571429 0.73076923 0.73076923 0.48
|
|
0.64285714 0.7 0.6 0.69565217]
|
|
|
|
mean value: 0.6236451719195347
|
|
|
|
key: train_jcc
|
|
value: [0.81860465 0.7918552 0.80751174 0.77727273 0.79908676 0.80454545
|
|
0.81278539 0.8156682 0.79816514 0.8 ]
|
|
|
|
mean value: 0.8025495260188461
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.4385066 0.42086315 0.38916779 0.36381245 0.39142966 0.44574833
|
|
0.50866032 0.32456923 0.28603816 0.34201574]
|
|
|
|
mean value: 0.3910811424255371
|
|
|
|
key: score_time
|
|
value: [0.02288485 0.04180908 0.01133728 0.02785611 0.03040051 0.02803445
|
|
0.01187062 0.07963872 0.01995134 0.02377748]
|
|
|
|
mean value: 0.029756045341491698
|
|
|
|
key: test_mcc
|
|
value: [0.34054054 0.36829757 0.32713229 0.74203177 0.58501794 0.28496141
|
|
0.54554473 0.46953014 0.58730159 0.65079365]
|
|
|
|
mean value: 0.49011516351553097
|
|
|
|
key: train_mcc
|
|
value: [0.69433091 0.67816125 0.76499433 0.65512331 0.68964641 0.70125462
|
|
0.67188139 0.68345012 0.71827166 0.74696087]
|
|
|
|
mean value: 0.7004074872029109
|
|
|
|
key: test_accuracy
|
|
value: [0.675 0.69230769 0.66666667 0.87179487 0.79487179 0.64102564
|
|
0.76923077 0.71794872 0.79487179 0.82051282]
|
|
|
|
mean value: 0.744423076923077
|
|
|
|
key: train_accuracy
|
|
value: [0.84900285 0.84090909 0.88352273 0.82954545 0.84659091 0.85227273
|
|
0.83806818 0.84375 0.86079545 0.875 ]
|
|
|
|
mean value: 0.8519457394457395
|
|
|
|
key: test_fscore
|
|
value: [0.71111111 0.76 0.69767442 0.88372093 0.83333333 0.65
|
|
0.80851064 0.78431373 0.80952381 0.82051282]
|
|
|
|
mean value: 0.7758700787106352
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.86582278 0.85858586 0.89350649 0.84693878 0.86294416 0.87064677
|
|
0.85642317 0.86075949 0.87719298 0.88888889]
|
|
|
|
mean value: 0.8681709379837828
|
|
|
|
key: test_precision
|
|
value: [0.69565217 0.67857143 0.71428571 0.9047619 0.76923077 0.68421053
|
|
0.73076923 0.66666667 0.80952381 0.88888889]
|
|
|
|
mean value: 0.7542561112927245
|
|
|
|
key: train_precision
|
|
value: [0.84653465 0.83743842 0.89583333 0.83417085 0.84577114 0.84134615
|
|
0.83743842 0.84577114 0.85365854 0.87128713]
|
|
|
|
mean value: 0.8509249796062281
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.86363636 0.68181818 0.86363636 0.90909091 0.61904762
|
|
0.9047619 0.95238095 0.80952381 0.76190476]
|
|
|
|
mean value: 0.8093073593073593
|
|
|
|
key: train_recall
|
|
value: [0.88601036 0.88082902 0.89119171 0.86010363 0.88082902 0.90206186
|
|
0.87628866 0.87628866 0.90206186 0.90721649]
|
|
|
|
mean value: 0.8862881256343144
|
|
|
|
key: test_roc_auc
|
|
value: [0.66919192 0.6671123 0.6644385 0.87299465 0.77807487 0.64285714
|
|
0.75793651 0.6984127 0.79365079 0.82539683]
|
|
|
|
mean value: 0.7370066208301502
|
|
|
|
key: train_roc_auc
|
|
value: [0.84490392 0.83664092 0.88270277 0.82627823 0.84293023 0.84660055
|
|
0.83371395 0.84004306 0.85609422 0.87132977]
|
|
|
|
mean value: 0.8481237618857027
|
|
|
|
key: test_jcc
|
|
value: [0.55172414 0.61290323 0.53571429 0.79166667 0.71428571 0.48148148
|
|
0.67857143 0.64516129 0.68 0.69565217]
|
|
|
|
mean value: 0.6387160404692687
|
|
|
|
key: train_jcc
|
|
value: [0.76339286 0.75221239 0.80751174 0.73451327 0.75892857 0.77092511
|
|
0.74889868 0.75555556 0.78125 0.8 ]
|
|
|
|
mean value: 0.7673188173479255
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04330349 0.08664036 0.11152577 0.2121923 0.12195444 0.11751413
|
|
0.06918955 0.14634132 0.18964505 0.07947135]
|
|
|
|
mean value: 0.11777777671813965
|
|
|
|
key: score_time
|
|
value: [0.01222849 0.01216006 0.0142653 0.02318764 0.01702595 0.01708937
|
|
0.01801968 0.01644063 0.0302918 0.02245378]
|
|
|
|
mean value: 0.018316268920898438
|
|
|
|
key: test_mcc
|
|
value: [0.53796222 0.30666041 0.58824786 0.72451364 0.62770563 0.25490741
|
|
0.81701092 0.61748053 0.2270149 0.63732414]
|
|
|
|
mean value: 0.5338827639400422
|
|
|
|
key: train_mcc
|
|
value: [0.71583503 0.73666076 0.72106756 0.69510176 0.72092837 0.75196987
|
|
0.67442109 0.66929302 0.73643866 0.72106756]
|
|
|
|
mean value: 0.7142783690459764
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.65116279 0.79069767 0.86046512 0.81395349 0.62790698
|
|
0.90697674 0.79069767 0.60465116 0.81395349]
|
|
|
|
mean value: 0.7627906976744185
|
|
|
|
key: train_accuracy
|
|
value: [0.85788114 0.86821705 0.86046512 0.84754522 0.86046512 0.87596899
|
|
0.8372093 0.83462532 0.86821705 0.86046512]
|
|
|
|
mean value: 0.8571059431524548
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.69387755 0.7804878 0.85714286 0.81818182 0.6
|
|
0.9 0.81632653 0.65306122 0.78947368]
|
|
|
|
mean value: 0.7670456232440461
|
|
|
|
key: train_fscore
|
|
value: [0.85639687 0.86614173 0.85863874 0.84754522 0.86010363 0.87692308
|
|
0.83804627 0.83419689 0.8688946 0.8622449 ]
|
|
|
|
mean value: 0.8569131929270901
|
|
|
|
key: test_precision
|
|
value: [0.8 0.62962963 0.84210526 0.9 0.81818182 0.63157895
|
|
0.94736842 0.71428571 0.57142857 0.88235294]
|
|
|
|
mean value: 0.7736931306281152
|
|
|
|
key: train_precision
|
|
value: [0.86315789 0.87765957 0.86772487 0.84536082 0.86010363 0.87244898
|
|
0.83589744 0.83854167 0.86666667 0.85353535]
|
|
|
|
mean value: 0.8581096890973028
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.77272727 0.72727273 0.81818182 0.81818182 0.57142857
|
|
0.85714286 0.95238095 0.76190476 0.71428571]
|
|
|
|
mean value: 0.772077922077922
|
|
|
|
key: train_recall
|
|
value: [0.84974093 0.85492228 0.84974093 0.84974093 0.86010363 0.8814433
|
|
0.84020619 0.82989691 0.87113402 0.87113402]
|
|
|
|
mean value: 0.8558063137652904
|
|
|
|
key: test_roc_auc
|
|
value: [0.76839827 0.6482684 0.79220779 0.86147186 0.81385281 0.62662338
|
|
0.90584416 0.79437229 0.60822511 0.81168831]
|
|
|
|
mean value: 0.7630952380952382
|
|
|
|
key: train_roc_auc
|
|
value: [0.85786016 0.86818279 0.86043748 0.84755088 0.86046418 0.87595481
|
|
0.83720154 0.83463757 0.8682095 0.86043748]
|
|
|
|
mean value: 0.8570936381603547
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.53125 0.64 0.75 0.69230769 0.42857143
|
|
0.81818182 0.68965517 0.48484848 0.65217391]
|
|
|
|
mean value: 0.630237312475131
|
|
|
|
key: train_jcc
|
|
value: [0.74885845 0.76388889 0.75229358 0.73542601 0.75454545 0.78082192
|
|
0.72123894 0.71555556 0.76818182 0.75784753]
|
|
|
|
mean value: 0.7498658141104166
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.27406001 2.85859323 2.05400014 3.12286806 2.90991402 2.67471933
|
|
3.62207961 3.69394207 2.33394408 1.94188452]
|
|
|
|
mean value: 2.8486005067825317
|
|
|
|
key: score_time
|
|
value: [0.01242566 0.01069951 0.01267076 0.01221442 0.02676773 0.01851606
|
|
0.02073812 0.02033973 0.02364993 0.01231551]
|
|
|
|
mean value: 0.0170337438583374
|
|
|
|
key: test_mcc
|
|
value: [0.53796222 0.35185603 0.50454827 0.62770563 0.58225108 0.25490741
|
|
0.86117339 0.55959928 0.16887427 0.62964308]
|
|
|
|
mean value: 0.5078520662310667
|
|
|
|
key: train_mcc
|
|
value: [0.78298032 0.84496021 0.82447378 0.64869633 0.7726435 0.80361626
|
|
0.63824048 0.63824048 0.67442978 0.59764154]
|
|
|
|
mean value: 0.7225922669452046
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.6744186 0.74418605 0.81395349 0.79069767 0.62790698
|
|
0.93023256 0.76744186 0.58139535 0.81395349]
|
|
|
|
mean value: 0.7511627906976744
|
|
|
|
key: train_accuracy
|
|
value: [0.89147287 0.92248062 0.9121447 0.82428941 0.88630491 0.90180879
|
|
0.81912145 0.81912145 0.8372093 0.79844961]
|
|
|
|
mean value: 0.8612403100775194
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.70833333 0.71794872 0.81818182 0.79069767 0.6
|
|
0.92682927 0.79166667 0.60869565 0.8 ]
|
|
|
|
mean value: 0.7524257892920498
|
|
|
|
key: train_fscore
|
|
value: [0.890625 0.92227979 0.91282051 0.82198953 0.88541667 0.90206186
|
|
0.81958763 0.81958763 0.8372093 0.8040201 ]
|
|
|
|
mean value: 0.861559801725926
|
|
|
|
key: test_precision
|
|
value: [0.8 0.65384615 0.82352941 0.81818182 0.80952381 0.63157895
|
|
0.95 0.7037037 0.56 0.84210526]
|
|
|
|
mean value: 0.7592469107546507
|
|
|
|
key: train_precision
|
|
value: [0.89528796 0.92227979 0.9035533 0.83068783 0.89005236 0.90206186
|
|
0.81958763 0.81958763 0.83937824 0.78431373]
|
|
|
|
mean value: 0.8606790314296683
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.77272727 0.63636364 0.81818182 0.77272727 0.57142857
|
|
0.9047619 0.9047619 0.66666667 0.76190476]
|
|
|
|
mean value: 0.7536796536796537
|
|
|
|
key: train_recall
|
|
value: [0.88601036 0.92227979 0.92227979 0.8134715 0.88082902 0.90206186
|
|
0.81958763 0.81958763 0.83505155 0.82474227]
|
|
|
|
mean value: 0.8625901394156295
|
|
|
|
key: test_roc_auc
|
|
value: [0.76839827 0.67207792 0.74675325 0.81385281 0.79112554 0.62662338
|
|
0.92965368 0.77056277 0.58333333 0.81277056]
|
|
|
|
mean value: 0.7515151515151515
|
|
|
|
key: train_roc_auc
|
|
value: [0.89145879 0.9224801 0.91217082 0.82426152 0.8862908 0.90180813
|
|
0.81912024 0.81912024 0.83721489 0.7983815 ]
|
|
|
|
mean value: 0.8612307034880615
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.5483871 0.56 0.69230769 0.65384615 0.42857143
|
|
0.86363636 0.65517241 0.4375 0.66666667]
|
|
|
|
mean value: 0.6121472430980217
|
|
|
|
key: train_jcc
|
|
value: [0.8028169 0.85576923 0.83962264 0.69777778 0.79439252 0.82159624
|
|
0.69432314 0.69432314 0.72 0.67226891]
|
|
|
|
mean value: 0.7592890514733467
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0136497 0.01171947 0.01167536 0.01198244 0.01169252 0.01167893
|
|
0.01165748 0.01183629 0.01165485 0.01159739]
|
|
|
|
mean value: 0.011914443969726563
|
|
|
|
key: score_time
|
|
value: [0.01215506 0.01054978 0.01036692 0.01034522 0.01050973 0.01032734
|
|
0.01033878 0.01041269 0.01039958 0.01040697]
|
|
|
|
mean value: 0.010581207275390626
|
|
|
|
key: test_mcc
|
|
value: [ 0.4912706 -0.08925021 0.44227524 0.2567 0.41330345 0.21351219
|
|
0.57282196 0.31757311 0.44054301 0.16485939]
|
|
|
|
mean value: 0.322360873390043
|
|
|
|
key: train_mcc
|
|
value: [0.3592453 0.43986881 0.39058415 0.373932 0.37829733 0.42490536
|
|
0.39988821 0.38683093 0.40827403 0.3976743 ]
|
|
|
|
mean value: 0.3959500432696024
|
|
|
|
key: test_accuracy
|
|
value: [0.74418605 0.46511628 0.72093023 0.62790698 0.69767442 0.60465116
|
|
0.74418605 0.62790698 0.69767442 0.58139535]
|
|
|
|
mean value: 0.6511627906976745
|
|
|
|
key: train_accuracy
|
|
value: [0.66925065 0.71317829 0.68475452 0.66666667 0.67958656 0.70542636
|
|
0.69250646 0.68475452 0.69767442 0.68992248]
|
|
|
|
mean value: 0.6883720930232559
|
|
|
|
key: test_fscore
|
|
value: [0.76595745 0.56603774 0.73913043 0.66666667 0.74509804 0.62222222
|
|
0.79245283 0.7037037 0.74509804 0.59090909]
|
|
|
|
mean value: 0.6937276209561912
|
|
|
|
key: train_fscore
|
|
value: [0.71555556 0.74364896 0.72767857 0.72727273 0.72197309 0.73972603
|
|
0.73015873 0.7264574 0.73226545 0.7309417 ]
|
|
|
|
mean value: 0.7295678216085548
|
|
|
|
key: test_precision
|
|
value: [0.72 0.48387097 0.70833333 0.61538462 0.65517241 0.58333333
|
|
0.65625 0.57575758 0.63333333 0.56521739]
|
|
|
|
mean value: 0.6196652963981578
|
|
|
|
key: train_precision
|
|
value: [0.62645914 0.67083333 0.63921569 0.61428571 0.63636364 0.66393443
|
|
0.65182186 0.64285714 0.65843621 0.6468254 ]
|
|
|
|
mean value: 0.6451032556478061
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.68181818 0.77272727 0.72727273 0.86363636 0.66666667
|
|
1. 0.9047619 0.9047619 0.61904762]
|
|
|
|
mean value: 0.7958874458874459
|
|
|
|
key: train_recall
|
|
value: [0.83419689 0.83419689 0.84455959 0.89119171 0.83419689 0.83505155
|
|
0.82989691 0.83505155 0.82474227 0.84020619]
|
|
|
|
mean value: 0.8403290422520164
|
|
|
|
key: test_roc_auc
|
|
value: [0.74242424 0.45995671 0.71969697 0.62554113 0.69372294 0.60606061
|
|
0.75 0.63419913 0.70238095 0.58225108]
|
|
|
|
mean value: 0.6516233766233767
|
|
|
|
key: train_roc_auc
|
|
value: [0.66967577 0.7134902 0.68516639 0.66724534 0.67998504 0.70509054
|
|
0.69215053 0.68436515 0.69734523 0.68953314]
|
|
|
|
mean value: 0.6884047326531703
|
|
|
|
key: test_jcc
|
|
value: [0.62068966 0.39473684 0.5862069 0.5 0.59375 0.4516129
|
|
0.65625 0.54285714 0.59375 0.41935484]
|
|
|
|
mean value: 0.5359208278622027
|
|
|
|
key: train_jcc
|
|
value: [0.55709343 0.59191176 0.57192982 0.57142857 0.56491228 0.58695652
|
|
0.575 0.57042254 0.57761733 0.57597173]
|
|
|
|
mean value: 0.5743243983922165
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01208615 0.0118084 0.0119195 0.01202059 0.01185799 0.01190257
|
|
0.01189137 0.01201367 0.01191545 0.01200342]
|
|
|
|
mean value: 0.011941909790039062
|
|
|
|
key: score_time
|
|
value: [0.01037812 0.01057816 0.01038575 0.01036143 0.01032662 0.01039433
|
|
0.01032615 0.01039243 0.01032019 0.0105021 ]
|
|
|
|
mean value: 0.010396528244018554
|
|
|
|
key: test_mcc
|
|
value: [0.4517935 0.02169203 0.40939224 0.4517935 0.35185603 0.11496773
|
|
0.81385281 0.44468651 0.2270149 0.30265778]
|
|
|
|
mean value: 0.3589707032809893
|
|
|
|
key: train_mcc
|
|
value: [0.41603013 0.4677316 0.4367638 0.44185674 0.44703809 0.44185674
|
|
0.43671867 0.44252245 0.43195073 0.45219272]
|
|
|
|
mean value: 0.44146616665156013
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.51162791 0.69767442 0.72093023 0.6744186 0.55813953
|
|
0.90697674 0.72093023 0.60465116 0.65116279]
|
|
|
|
mean value: 0.6767441860465117
|
|
|
|
key: train_accuracy
|
|
value: [0.70801034 0.73385013 0.71834625 0.72093023 0.72351421 0.72093023
|
|
0.71834625 0.72093023 0.71576227 0.72609819]
|
|
|
|
mean value: 0.720671834625323
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.53333333 0.66666667 0.7 0.70833333 0.53658537
|
|
0.9047619 0.72727273 0.65306122 0.61538462]
|
|
|
|
mean value: 0.6745399171096035
|
|
|
|
key: train_fscore
|
|
value: [0.70801034 0.7310705 0.71979434 0.72020725 0.72351421 0.72164948
|
|
0.72122762 0.71428571 0.71052632 0.72680412]
|
|
|
|
mean value: 0.7197089902052173
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.52173913 0.76470588 0.77777778 0.65384615 0.55
|
|
0.9047619 0.69565217 0.57142857 0.66666667]
|
|
|
|
mean value: 0.6884356038959619
|
|
|
|
key: train_precision
|
|
value: [0.70618557 0.73684211 0.71428571 0.72020725 0.72164948 0.72164948
|
|
0.71573604 0.73369565 0.72580645 0.72680412]
|
|
|
|
mean value: 0.722286187762465
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.54545455 0.59090909 0.63636364 0.77272727 0.52380952
|
|
0.9047619 0.76190476 0.76190476 0.57142857]
|
|
|
|
mean value: 0.6705627705627706
|
|
|
|
key: train_recall
|
|
value: [0.70984456 0.7253886 0.7253886 0.72020725 0.7253886 0.72164948
|
|
0.72680412 0.69587629 0.69587629 0.72680412]
|
|
|
|
mean value: 0.7173227925858662
|
|
|
|
key: test_roc_auc
|
|
value: [0.72294372 0.51082251 0.70021645 0.72294372 0.67207792 0.55735931
|
|
0.90692641 0.72186147 0.60822511 0.64935065]
|
|
|
|
mean value: 0.6772727272727272
|
|
|
|
key: train_roc_auc
|
|
value: [0.70801506 0.73382832 0.7183644 0.72092837 0.72351904 0.72092837
|
|
0.71832434 0.72099514 0.71581379 0.72609636]
|
|
|
|
mean value: 0.7206813204422841
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.36363636 0.5 0.53846154 0.5483871 0.36666667
|
|
0.82608696 0.57142857 0.48484848 0.44444444]
|
|
|
|
mean value: 0.518242166124354
|
|
|
|
key: train_jcc
|
|
value: [0.548 0.57613169 0.562249 0.56275304 0.56680162 0.56451613
|
|
0.564 0.55555556 0.55102041 0.5708502 ]
|
|
|
|
mean value: 0.5621877634277408
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01142979 0.0113678 0.01118851 0.01128006 0.01139712 0.01154923
|
|
0.01103115 0.01123428 0.0113554 0.01163101]
|
|
|
|
mean value: 0.011346435546875
|
|
|
|
key: score_time
|
|
value: [0.02301836 0.0221324 0.02117586 0.02700591 0.01861501 0.02525496
|
|
0.02225375 0.02616882 0.02647042 0.02867126]
|
|
|
|
mean value: 0.024076676368713378
|
|
|
|
key: test_mcc
|
|
value: [ 0.25541126 -0.02614435 0.12939849 0.44468651 0.4517935 -0.11404496
|
|
0.16233766 0.34848485 0.26318068 0.30265778]
|
|
|
|
mean value: 0.22177614182156274
|
|
|
|
key: train_mcc
|
|
value: [0.55559413 0.56087856 0.53050168 0.54551531 0.54025969 0.61240352
|
|
0.5507031 0.55039795 0.55564759 0.51951804]
|
|
|
|
mean value: 0.5521419571533892
|
|
|
|
key: test_accuracy
|
|
value: [0.62790698 0.48837209 0.55813953 0.72093023 0.72093023 0.44186047
|
|
0.58139535 0.6744186 0.62790698 0.65116279]
|
|
|
|
mean value: 0.6093023255813953
|
|
|
|
key: train_accuracy
|
|
value: [0.77777778 0.78036176 0.76485788 0.77260982 0.77002584 0.80620155
|
|
0.7751938 0.7751938 0.77777778 0.75968992]
|
|
|
|
mean value: 0.775968992248062
|
|
|
|
key: test_fscore
|
|
value: [0.63636364 0.52173913 0.48648649 0.71428571 0.7 0.47826087
|
|
0.57142857 0.66666667 0.65217391 0.61538462]
|
|
|
|
mean value: 0.6042789603659169
|
|
|
|
key: train_fscore
|
|
value: [0.77835052 0.77690289 0.75733333 0.7755102 0.77237852 0.80719794
|
|
0.77974684 0.7751938 0.78061224 0.76335878]
|
|
|
|
mean value: 0.7766585057503326
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.5 0.6 0.75 0.77777778 0.44
|
|
0.57142857 0.66666667 0.6 0.66666667]
|
|
|
|
mean value: 0.6208903318903318
|
|
|
|
key: train_precision
|
|
value: [0.77435897 0.78723404 0.78021978 0.7638191 0.76262626 0.80512821
|
|
0.76616915 0.77720207 0.77272727 0.75376884]
|
|
|
|
mean value: 0.7743253704079895
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.54545455 0.40909091 0.68181818 0.63636364 0.52380952
|
|
0.57142857 0.66666667 0.71428571 0.57142857]
|
|
|
|
mean value: 0.5956709956709957
|
|
|
|
key: train_recall
|
|
value: [0.78238342 0.76683938 0.7357513 0.78756477 0.78238342 0.80927835
|
|
0.79381443 0.77319588 0.78865979 0.77319588]
|
|
|
|
mean value: 0.7793066609689653
|
|
|
|
key: test_roc_auc
|
|
value: [0.62770563 0.48701299 0.56168831 0.72186147 0.72294372 0.44372294
|
|
0.58116883 0.67424242 0.62987013 0.64935065]
|
|
|
|
mean value: 0.6099567099567099
|
|
|
|
key: train_roc_auc
|
|
value: [0.77778965 0.78032691 0.76478286 0.77264836 0.77005769 0.80619358
|
|
0.77514556 0.77519897 0.77774959 0.75965493]
|
|
|
|
mean value: 0.7759548101062977
|
|
|
|
key: test_jcc
|
|
value: [0.46666667 0.35294118 0.32142857 0.55555556 0.53846154 0.31428571
|
|
0.4 0.5 0.48387097 0.44444444]
|
|
|
|
mean value: 0.43776546350550144
|
|
|
|
key: train_jcc
|
|
value: [0.6371308 0.63519313 0.60944206 0.63333333 0.62916667 0.67672414
|
|
0.63900415 0.63291139 0.64016736 0.61728395]
|
|
|
|
mean value: 0.6350356989168522
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02410388 0.0238421 0.02404499 0.02403641 0.02398729 0.02347827
|
|
0.02385569 0.02381396 0.02365422 0.02424955]
|
|
|
|
mean value: 0.023906636238098144
|
|
|
|
key: score_time
|
|
value: [0.01353765 0.01356936 0.01376414 0.0136354 0.01343894 0.01350808
|
|
0.01371026 0.01338792 0.01352549 0.01347947]
|
|
|
|
mean value: 0.013555669784545898
|
|
|
|
key: test_mcc
|
|
value: [0.4517935 0.34859132 0.55959928 0.73471273 0.62770563 0.25490741
|
|
0.67462198 0.53796222 0.21351219 0.59541363]
|
|
|
|
mean value: 0.49988198788408944
|
|
|
|
key: train_mcc
|
|
value: [0.7371987 0.71733232 0.706524 0.6858155 0.69022752 0.71625569
|
|
0.69518417 0.6908848 0.73191874 0.72659748]
|
|
|
|
mean value: 0.7097938919329834
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.6744186 0.76744186 0.86046512 0.81395349 0.62790698
|
|
0.8372093 0.76744186 0.60465116 0.79069767]
|
|
|
|
mean value: 0.7465116279069768
|
|
|
|
key: train_accuracy
|
|
value: [0.86821705 0.85788114 0.85271318 0.84237726 0.84496124 0.85788114
|
|
0.84754522 0.84496124 0.86563307 0.8630491 ]
|
|
|
|
mean value: 0.8545219638242894
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.69565217 0.73684211 0.85 0.81818182 0.6
|
|
0.82926829 0.77272727 0.62222222 0.75675676]
|
|
|
|
mean value: 0.7381650641747198
|
|
|
|
key: train_fscore
|
|
value: [0.86472149 0.85254692 0.848 0.83733333 0.84210526 0.85564304
|
|
0.84675325 0.84126984 0.86315789 0.86089239]
|
|
|
|
mean value: 0.8512423414623245
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.66666667 0.875 0.94444444 0.81818182 0.63157895
|
|
0.85 0.73913043 0.58333333 0.875 ]
|
|
|
|
mean value: 0.776111342255507
|
|
|
|
key: train_precision
|
|
value: [0.88586957 0.88333333 0.87362637 0.86263736 0.85561497 0.87165775
|
|
0.85340314 0.86413043 0.88172043 0.87700535]
|
|
|
|
mean value: 0.8708998715932164
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.72727273 0.63636364 0.77272727 0.81818182 0.57142857
|
|
0.80952381 0.80952381 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7114718614718615
|
|
|
|
key: train_recall
|
|
value: [0.84455959 0.8238342 0.8238342 0.8134715 0.82901554 0.84020619
|
|
0.84020619 0.81958763 0.84536082 0.84536082]
|
|
|
|
mean value: 0.8325436675391272
|
|
|
|
key: test_roc_auc
|
|
value: [0.72294372 0.67316017 0.77056277 0.86255411 0.81385281 0.62662338
|
|
0.83658009 0.76839827 0.60606061 0.78787879]
|
|
|
|
mean value: 0.7468614718614719
|
|
|
|
key: train_roc_auc
|
|
value: [0.86815608 0.85779339 0.85263875 0.84230276 0.84492014 0.85792693
|
|
0.84756423 0.84502698 0.86568559 0.86309492]
|
|
|
|
mean value: 0.8545109769777256
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.53333333 0.58333333 0.73913043 0.69230769 0.42857143
|
|
0.70833333 0.62962963 0.4516129 0.60869565]
|
|
|
|
mean value: 0.5913409279152617
|
|
|
|
key: train_jcc
|
|
value: [0.76168224 0.74299065 0.73611111 0.72018349 0.72727273 0.74770642
|
|
0.73423423 0.7260274 0.75925926 0.75576037]
|
|
|
|
mean value: 0.7411227903254343
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.97078109 3.97416925 2.70587897 4.44103217 4.18598366 3.85128641
|
|
4.23357773 3.41070867 2.87078166 1.80410576]
|
|
|
|
mean value: 3.5448305368423463
|
|
|
|
key: score_time
|
|
value: [0.02938771 0.0144546 0.02191424 0.02190518 0.02358532 0.02501225
|
|
0.01258945 0.03111053 0.01438427 0.01988482]
|
|
|
|
mean value: 0.021422839164733885
|
|
|
|
key: test_mcc
|
|
value: [0.54609991 0.35185603 0.49456394 0.67532468 0.58134627 0.34848485
|
|
0.72077922 0.32463131 0.3030303 0.74914918]
|
|
|
|
mean value: 0.5095265685481636
|
|
|
|
key: train_mcc
|
|
value: [0.9638374 0.97427611 0.96899204 0.98450896 0.95865605 0.9741727
|
|
0.98450937 0.9741727 0.97417339 0.96899204]
|
|
|
|
mean value: 0.9726290770221656
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.6744186 0.74418605 0.8372093 0.79069767 0.6744186
|
|
0.86046512 0.65116279 0.65116279 0.86046512]
|
|
|
|
mean value: 0.7511627906976744
|
|
|
|
key: train_accuracy
|
|
value: [0.98191214 0.9870801 0.98449612 0.99224806 0.97932817 0.9870801
|
|
0.99224806 0.9870801 0.9870801 0.98449612]
|
|
|
|
mean value: 0.9863049095607235
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.70833333 0.73170732 0.8372093 0.8 0.66666667
|
|
0.85714286 0.69387755 0.65116279 0.83333333]
|
|
|
|
mean value: 0.7529433151593025
|
|
|
|
key: train_fscore
|
|
value: [0.98191214 0.98694517 0.98445596 0.99220779 0.97927461 0.98714653
|
|
0.99224806 0.98714653 0.9870801 0.98453608]
|
|
|
|
mean value: 0.9862952983546482
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.65384615 0.78947368 0.85714286 0.7826087 0.66666667
|
|
0.85714286 0.60714286 0.63636364 1. ]
|
|
|
|
mean value: 0.7683720741501062
|
|
|
|
key: train_precision
|
|
value: [0.97938144 0.99473684 0.98445596 0.99479167 0.97927461 0.98461538
|
|
0.99481865 0.98461538 0.98963731 0.98453608]
|
|
|
|
mean value: 0.9870863332273304
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.77272727 0.68181818 0.81818182 0.81818182 0.66666667
|
|
0.85714286 0.80952381 0.66666667 0.71428571]
|
|
|
|
mean value: 0.7487012987012986
|
|
|
|
key: train_recall
|
|
value: [0.98445596 0.97927461 0.98445596 0.98963731 0.97927461 0.98969072
|
|
0.98969072 0.98969072 0.98453608 0.98453608]
|
|
|
|
mean value: 0.9855242775492762
|
|
|
|
key: test_roc_auc
|
|
value: [0.76948052 0.67207792 0.745671 0.83766234 0.79004329 0.67424242
|
|
0.86038961 0.6547619 0.65151515 0.85714286]
|
|
|
|
mean value: 0.7512987012987014
|
|
|
|
key: train_roc_auc
|
|
value: [0.9819187 0.98705999 0.98449602 0.99224133 0.97932803 0.98707334
|
|
0.99225469 0.98707334 0.98708669 0.98449602]
|
|
|
|
mean value: 0.9863028150205652
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5483871 0.57692308 0.72 0.66666667 0.5
|
|
0.75 0.53125 0.48275862 0.71428571]
|
|
|
|
mean value: 0.6090271175339307
|
|
|
|
key: train_jcc
|
|
value: [0.96446701 0.9742268 0.96938776 0.98453608 0.95939086 0.97461929
|
|
0.98461538 0.97461929 0.9744898 0.96954315]
|
|
|
|
mean value: 0.972989541614236
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02931452 0.03400421 0.0351491 0.02511573 0.02355576 0.0299964
|
|
0.02605271 0.02237582 0.02702141 0.0285213 ]
|
|
|
|
mean value: 0.028110694885253907
|
|
|
|
key: score_time
|
|
value: [0.0128603 0.01022124 0.00969696 0.01189399 0.010005 0.00916767
|
|
0.00931692 0.01336789 0.00971723 0.00991654]
|
|
|
|
mean value: 0.010616374015808106
|
|
|
|
key: test_mcc
|
|
value: [0.44468651 0.34848485 0.44468651 0.54609991 0.81701092 0.21351219
|
|
0.723327 0.58824786 0.53463203 0.35868355]
|
|
|
|
mean value: 0.5019371317009578
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.6744186 0.72093023 0.76744186 0.90697674 0.60465116
|
|
0.86046512 0.79069767 0.76744186 0.6744186 ]
|
|
|
|
mean value: 0.7488372093023256
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.68181818 0.71428571 0.75 0.91304348 0.62222222
|
|
0.85 0.8 0.76190476 0.61111111]
|
|
|
|
mean value: 0.7418671183888574
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.68181818 0.75 0.83333333 0.875 0.58333333
|
|
0.89473684 0.75 0.76190476 0.73333333]
|
|
|
|
mean value: 0.7613459785828207
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.68181818 0.68181818 0.68181818 0.95454545 0.66666667
|
|
0.80952381 0.85714286 0.76190476 0.52380952]
|
|
|
|
mean value: 0.7300865800865801
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.72186147 0.67424242 0.72186147 0.76948052 0.90584416 0.60606061
|
|
0.85930736 0.79220779 0.76731602 0.67099567]
|
|
|
|
mean value: 0.7489177489177489
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.51724138 0.55555556 0.6 0.84 0.4516129
|
|
0.73913043 0.66666667 0.61538462 0.44 ]
|
|
|
|
mean value: 0.5981147110481153
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13478565 0.13071537 0.1360302 0.13517666 0.13477015 0.13016009
|
|
0.14609933 0.13371921 0.34426522 0.34663653]
|
|
|
|
mean value: 0.17723584175109863
|
|
|
|
key: score_time
|
|
value: [0.0183773 0.02117014 0.01810002 0.0189991 0.01942897 0.02047467
|
|
0.02483344 0.01833344 0.02377844 0.02443361]
|
|
|
|
mean value: 0.02079291343688965
|
|
|
|
key: test_mcc
|
|
value: [0.49456394 0.34848485 0.54609991 0.82901914 0.58134627 0.30151915
|
|
0.72077922 0.53796222 0.44155844 0.4912706 ]
|
|
|
|
mean value: 0.529260373056181
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.74418605 0.6744186 0.76744186 0.90697674 0.79069767 0.65116279
|
|
0.86046512 0.76744186 0.72093023 0.74418605]
|
|
|
|
mean value: 0.7627906976744185
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.73170732 0.68181818 0.75 0.9 0.8 0.63414634
|
|
0.85714286 0.77272727 0.71428571 0.71794872]
|
|
|
|
mean value: 0.755977640245933
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.68181818 0.83333333 1. 0.7826087 0.65
|
|
0.85714286 0.73913043 0.71428571 0.77777778]
|
|
|
|
mean value: 0.7825570679003173
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.68181818 0.68181818 0.81818182 0.81818182 0.61904762
|
|
0.85714286 0.80952381 0.71428571 0.66666667]
|
|
|
|
mean value: 0.7348484848484849
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.745671 0.67424242 0.76948052 0.90909091 0.79004329 0.6504329
|
|
0.86038961 0.76839827 0.72077922 0.74242424]
|
|
|
|
mean value: 0.763095238095238
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57692308 0.51724138 0.6 0.81818182 0.66666667 0.46428571
|
|
0.75 0.62962963 0.55555556 0.56 ]
|
|
|
|
mean value: 0.6138483840552806
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01684761 0.01725125 0.03084469 0.03082466 0.03178549 0.01522112
|
|
0.01549125 0.01581836 0.04029131 0.02556753]
|
|
|
|
mean value: 0.0239943265914917
|
|
|
|
key: score_time
|
|
value: [0.01382542 0.0134089 0.02095604 0.0213182 0.01256824 0.0129602
|
|
0.01235056 0.02648115 0.02352309 0.01250362]
|
|
|
|
mean value: 0.016989541053771973
|
|
|
|
key: test_mcc
|
|
value: [0.3030303 0.25490741 0.20995671 0.49456394 0.58134627 0.11982827
|
|
0.58225108 0.16485939 0.34859132 0.06695322]
|
|
|
|
mean value: 0.31262879090530155
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.65116279 0.62790698 0.60465116 0.74418605 0.79069767 0.55813953
|
|
0.79069767 0.58139535 0.6744186 0.53488372]
|
|
|
|
mean value: 0.6558139534883721
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.65116279 0.65217391 0.60465116 0.73170732 0.8 0.57777778
|
|
0.79069767 0.59090909 0.65 0.375 ]
|
|
|
|
mean value: 0.6424079726710494
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.625 0.61904762 0.78947368 0.7826087 0.54166667
|
|
0.77272727 0.56521739 0.68421053 0.54545455]
|
|
|
|
mean value: 0.6592073068045607
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.68181818 0.59090909 0.68181818 0.81818182 0.61904762
|
|
0.80952381 0.61904762 0.61904762 0.28571429]
|
|
|
|
mean value: 0.6361471861471861
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.65151515 0.62662338 0.60497835 0.745671 0.79004329 0.55952381
|
|
0.79112554 0.58225108 0.67316017 0.52922078]
|
|
|
|
mean value: 0.6554112554112554
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.48275862 0.48387097 0.43333333 0.57692308 0.66666667 0.40625
|
|
0.65384615 0.41935484 0.48148148 0.23076923]
|
|
|
|
mean value: 0.4835254370161211
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.97463274 1.71187091 1.76081896 1.68633628 1.68973565 2.1227293
|
|
4.62051916 2.92394114 2.63578987 2.66768122]
|
|
|
|
mean value: 2.4794055223464966
|
|
|
|
key: score_time
|
|
value: [0.09138441 0.09405994 0.09149313 0.09162092 0.09041667 0.24971867
|
|
0.25352859 0.14864159 0.12520003 0.1521244 ]
|
|
|
|
mean value: 0.1388188362121582
|
|
|
|
key: test_mcc
|
|
value: [0.58824786 0.40291148 0.67532468 0.61748053 0.72077922 0.34859132
|
|
0.81701092 0.73471273 0.53463203 0.63732414]
|
|
|
|
mean value: 0.6077014905508663
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79069767 0.69767442 0.8372093 0.79069767 0.86046512 0.6744186
|
|
0.90697674 0.86046512 0.76744186 0.81395349]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.73469388 0.8372093 0.75675676 0.86363636 0.65
|
|
0.9 0.86956522 0.76190476 0.78947368]
|
|
|
|
mean value: 0.7943727768654364
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84210526 0.66666667 0.85714286 0.93333333 0.86363636 0.68421053
|
|
0.94736842 0.8 0.76190476 0.88235294]
|
|
|
|
mean value: 0.8238721134386768
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.81818182 0.81818182 0.63636364 0.86363636 0.61904762
|
|
0.85714286 0.95238095 0.76190476 0.71428571]
|
|
|
|
mean value: 0.7768398268398269
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79220779 0.69480519 0.83766234 0.79437229 0.86038961 0.67316017
|
|
0.90584416 0.86255411 0.76731602 0.81168831]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.58064516 0.72 0.60869565 0.76 0.48148148
|
|
0.81818182 0.76923077 0.61538462 0.65217391]
|
|
|
|
mean value: 0.6645793410786398
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.72556663 1.91589737 1.26766062 1.03932428 0.95912004 1.64210296
|
|
1.90016818 2.0203793 1.93958759 1.73655081]
|
|
|
|
mean value: 1.6146357774734497
|
|
|
|
key: score_time
|
|
value: [0.17945695 0.17857099 0.13251996 0.22311735 0.22053123 0.17586827
|
|
0.22016335 0.18613434 0.22368526 0.18402171]
|
|
|
|
mean value: 0.19240694046020507
|
|
|
|
key: test_mcc
|
|
value: [0.53463203 0.45629995 0.76839827 0.61748053 0.76789769 0.39696419
|
|
0.81701092 0.69486034 0.48807056 0.53595916]
|
|
|
|
mean value: 0.6077573644581136
|
|
|
|
key: train_mcc
|
|
value: [0.90182148 0.88142257 0.90717492 0.88114951 0.8914855 0.89664014
|
|
0.89158365 0.8914855 0.89158365 0.88123732]
|
|
|
|
mean value: 0.8915584237957668
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.72093023 0.88372093 0.79069767 0.88372093 0.69767442
|
|
0.90697674 0.8372093 0.74418605 0.76744186]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_accuracy
|
|
value: [0.95090439 0.94056848 0.95348837 0.94056848 0.94573643 0.94832041
|
|
0.94573643 0.94573643 0.94573643 0.94056848]
|
|
|
|
mean value: 0.9457364341085271
|
|
|
|
key: test_fscore
|
|
value: [0.77272727 0.76 0.88372093 0.75675676 0.88888889 0.66666667
|
|
0.9 0.85106383 0.73170732 0.75 ]
|
|
|
|
mean value: 0.7961531662132548
|
|
|
|
key: train_fscore
|
|
value: [0.95090439 0.93963255 0.95384615 0.94056848 0.94573643 0.94845361
|
|
0.94545455 0.94573643 0.94545455 0.94117647]
|
|
|
|
mean value: 0.945696360595677
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.67857143 0.9047619 0.93333333 0.86956522 0.72222222
|
|
0.94736842 0.76923077 0.75 0.78947368]
|
|
|
|
mean value: 0.8137254253501394
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.94845361 0.95212766 0.94416244 0.93814433 0.94329897 0.94845361
|
|
0.95287958 0.94818653 0.95287958 0.93401015]
|
|
|
|
mean value: 0.9462596454671948
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.86363636 0.86363636 0.63636364 0.90909091 0.61904762
|
|
0.85714286 0.95238095 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7902597402597402
|
|
|
|
key: train_recall
|
|
value: [0.95336788 0.92746114 0.96373057 0.94300518 0.94818653 0.94845361
|
|
0.93814433 0.94329897 0.93814433 0.94845361]
|
|
|
|
mean value: 0.9452246140697612
|
|
|
|
key: test_roc_auc
|
|
value: [0.76731602 0.71753247 0.88419913 0.79437229 0.88311688 0.69588745
|
|
0.90584416 0.83982684 0.74350649 0.76623377]
|
|
|
|
mean value: 0.7997835497835498
|
|
|
|
key: train_roc_auc
|
|
value: [0.95091074 0.94053469 0.95351477 0.94057476 0.94574275 0.94832007
|
|
0.9457561 0.94574275 0.9457561 0.94054805]
|
|
|
|
mean value: 0.945740077987287
|
|
|
|
key: test_jcc
|
|
value: [0.62962963 0.61290323 0.79166667 0.60869565 0.8 0.5
|
|
0.81818182 0.74074074 0.57692308 0.6 ]
|
|
|
|
mean value: 0.6678740810122297
|
|
|
|
key: train_jcc
|
|
value: [0.90640394 0.88613861 0.91176471 0.88780488 0.89705882 0.90196078
|
|
0.89655172 0.89705882 0.89655172 0.88888889]
|
|
|
|
mean value: 0.897018290721652
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.03860426 0.03471041 0.02413154 0.01475096 0.01522779 0.01514721
|
|
0.01484656 0.01524067 0.01517296 0.0147686 ]
|
|
|
|
mean value: 0.020260095596313477
|
|
|
|
key: score_time
|
|
value: [0.02451944 0.02949691 0.01293755 0.01273441 0.01294351 0.01272106
|
|
0.01293421 0.01299977 0.01297522 0.01269722]
|
|
|
|
mean value: 0.015695929527282715
|
|
|
|
key: test_mcc
|
|
value: [0.4517935 0.02169203 0.40939224 0.4517935 0.35185603 0.11496773
|
|
0.81385281 0.44468651 0.2270149 0.30265778]
|
|
|
|
mean value: 0.3589707032809893
|
|
|
|
key: train_mcc
|
|
value: [0.41603013 0.4677316 0.4367638 0.44185674 0.44703809 0.44185674
|
|
0.43671867 0.44252245 0.43195073 0.45219272]
|
|
|
|
mean value: 0.44146616665156013
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.51162791 0.69767442 0.72093023 0.6744186 0.55813953
|
|
0.90697674 0.72093023 0.60465116 0.65116279]
|
|
|
|
mean value: 0.6767441860465117
|
|
|
|
key: train_accuracy
|
|
value: [0.70801034 0.73385013 0.71834625 0.72093023 0.72351421 0.72093023
|
|
0.71834625 0.72093023 0.71576227 0.72609819]
|
|
|
|
mean value: 0.720671834625323
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.53333333 0.66666667 0.7 0.70833333 0.53658537
|
|
0.9047619 0.72727273 0.65306122 0.61538462]
|
|
|
|
mean value: 0.6745399171096035
|
|
|
|
key: train_fscore
|
|
value: [0.70801034 0.7310705 0.71979434 0.72020725 0.72351421 0.72164948
|
|
0.72122762 0.71428571 0.71052632 0.72680412]
|
|
|
|
mean value: 0.7197089902052173
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.52173913 0.76470588 0.77777778 0.65384615 0.55
|
|
0.9047619 0.69565217 0.57142857 0.66666667]
|
|
|
|
mean value: 0.6884356038959619
|
|
|
|
key: train_precision
|
|
value: [0.70618557 0.73684211 0.71428571 0.72020725 0.72164948 0.72164948
|
|
0.71573604 0.73369565 0.72580645 0.72680412]
|
|
|
|
mean value: 0.722286187762465
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.54545455 0.59090909 0.63636364 0.77272727 0.52380952
|
|
0.9047619 0.76190476 0.76190476 0.57142857]
|
|
|
|
mean value: 0.6705627705627706
|
|
|
|
key: train_recall
|
|
value: [0.70984456 0.7253886 0.7253886 0.72020725 0.7253886 0.72164948
|
|
0.72680412 0.69587629 0.69587629 0.72680412]
|
|
|
|
mean value: 0.7173227925858662
|
|
|
|
key: test_roc_auc
|
|
value: [0.72294372 0.51082251 0.70021645 0.72294372 0.67207792 0.55735931
|
|
0.90692641 0.72186147 0.60822511 0.64935065]
|
|
|
|
mean value: 0.6772727272727272
|
|
|
|
key: train_roc_auc
|
|
value: [0.70801506 0.73382832 0.7183644 0.72092837 0.72351904 0.72092837
|
|
0.71832434 0.72099514 0.71581379 0.72609636]
|
|
|
|
mean value: 0.7206813204422841
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.36363636 0.5 0.53846154 0.5483871 0.36666667
|
|
0.82608696 0.57142857 0.48484848 0.44444444]
|
|
|
|
mean value: 0.518242166124354
|
|
|
|
key: train_jcc
|
|
value: [0.548 0.57613169 0.562249 0.56275304 0.56680162 0.56451613
|
|
0.564 0.55555556 0.55102041 0.5708502 ]
|
|
|
|
mean value: 0.5621877634277408
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [5.84490728 7.99645638 8.01863933 8.01061749 6.95634055 3.86555934
|
|
2.4676311 2.49709249 2.40035462 2.49079871]
|
|
|
|
mean value: 5.054839730262756
|
|
|
|
key: score_time
|
|
value: [0.01869607 0.02224588 0.02842999 0.02292418 0.0223074 0.01271367
|
|
0.01389503 0.01313758 0.01270461 0.01333499]
|
|
|
|
mean value: 0.0180389404296875
|
|
|
|
key: test_mcc
|
|
value: [0.53463203 0.53595916 0.76839827 0.61748053 0.9544491 0.44468651
|
|
0.90692641 0.68193178 0.62770563 0.77418983]
|
|
|
|
mean value: 0.6846359249520138
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.76744186 0.88372093 0.79069767 0.97674419 0.72093023
|
|
0.95348837 0.8372093 0.81395349 0.88372093]
|
|
|
|
mean value: 0.8395348837209302
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.77272727 0.7826087 0.88372093 0.75675676 0.97777778 0.72727273
|
|
0.95238095 0.84444444 0.80952381 0.87179487]
|
|
|
|
mean value: 0.8379008238563345
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.75 0.9047619 0.93333333 0.95652174 0.69565217
|
|
0.95238095 0.79166667 0.80952381 0.94444444]
|
|
|
|
mean value: 0.8511012296881862
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.81818182 0.86363636 0.63636364 1. 0.76190476
|
|
0.95238095 0.9047619 0.80952381 0.80952381]
|
|
|
|
mean value: 0.8329004329004329
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76731602 0.76623377 0.88419913 0.79437229 0.97619048 0.72186147
|
|
0.9534632 0.83874459 0.81385281 0.88203463]
|
|
|
|
mean value: 0.8398268398268398
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.62962963 0.64285714 0.79166667 0.60869565 0.95652174 0.57142857
|
|
0.90909091 0.73076923 0.68 0.77272727]
|
|
|
|
mean value: 0.7293386814473771
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04299521 0.08748269 0.06289506 0.08957219 0.08889985 0.09818339
|
|
0.08479619 0.08244371 0.0967896 0.08396053]
|
|
|
|
mean value: 0.08180184364318847
|
|
|
|
key: score_time
|
|
value: [0.01770806 0.02164626 0.01231289 0.02033544 0.02275109 0.01549911
|
|
0.02078938 0.02091646 0.0211823 0.02512574]
|
|
|
|
mean value: 0.019826674461364747
|
|
|
|
key: test_mcc
|
|
value: [0.44468651 0.26856633 0.36709713 0.67532468 0.58557701 0.06753957
|
|
0.67532468 0.54609991 0.34848485 0.53595916]
|
|
|
|
mean value: 0.45146598115905806
|
|
|
|
key: train_mcc
|
|
value: [0.78838964 0.80887382 0.79846162 0.75718561 0.75711768 0.84496021
|
|
0.7726435 0.78310005 0.82954911 0.76744745]
|
|
|
|
mean value: 0.7907728681165043
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.62790698 0.6744186 0.8372093 0.79069767 0.53488372
|
|
0.8372093 0.76744186 0.6744186 0.76744186]
|
|
|
|
mean value: 0.7232558139534884
|
|
|
|
key: train_accuracy
|
|
value: [0.89405685 0.90439276 0.89922481 0.87855297 0.87855297 0.92248062
|
|
0.88630491 0.89147287 0.91472868 0.88372093]
|
|
|
|
mean value: 0.8953488372093024
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.69230769 0.63157895 0.8372093 0.80851064 0.5
|
|
0.8372093 0.7826087 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7220376959229704
|
|
|
|
key: train_fscore
|
|
value: [0.89514066 0.90339426 0.89922481 0.8772846 0.87855297 0.92268041
|
|
0.88717949 0.89285714 0.91560102 0.88431877]
|
|
|
|
mean value: 0.8956234125406854
|
|
|
|
key: test_precision
|
|
value: [0.75 0.6 0.75 0.85714286 0.76 0.52631579
|
|
0.81818182 0.72 0.66666667 0.78947368]
|
|
|
|
mean value: 0.7237780815675552
|
|
|
|
key: train_precision
|
|
value: [0.88383838 0.91052632 0.89690722 0.88421053 0.87628866 0.92268041
|
|
0.88265306 0.88383838 0.90862944 0.88205128]
|
|
|
|
mean value: 0.8931623683341963
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.81818182 0.54545455 0.81818182 0.86363636 0.47619048
|
|
0.85714286 0.85714286 0.66666667 0.71428571]
|
|
|
|
mean value: 0.7298701298701299
|
|
|
|
key: train_recall
|
|
value: [0.90673575 0.89637306 0.9015544 0.87046632 0.88082902 0.92268041
|
|
0.89175258 0.90206186 0.92268041 0.88659794]
|
|
|
|
mean value: 0.8981731745099086
|
|
|
|
key: test_roc_auc
|
|
value: [0.72186147 0.62337662 0.67748918 0.83766234 0.78896104 0.53354978
|
|
0.83766234 0.76948052 0.67424242 0.76623377]
|
|
|
|
mean value: 0.7230519480519481
|
|
|
|
key: train_roc_auc
|
|
value: [0.89408953 0.9043721 0.89923081 0.87853213 0.87855884 0.9224801
|
|
0.8862908 0.89144544 0.91470808 0.88371348]
|
|
|
|
mean value: 0.8953421291597671
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.52941176 0.46153846 0.72 0.67857143 0.33333333
|
|
0.72 0.64285714 0.5 0.6 ]
|
|
|
|
mean value: 0.5741267686561804
|
|
|
|
key: train_jcc
|
|
value: [0.81018519 0.82380952 0.81690141 0.78139535 0.78341014 0.85645933
|
|
0.79723502 0.80645161 0.84433962 0.79262673]
|
|
|
|
mean value: 0.811281392137182
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01140189 0.01118898 0.01079679 0.01073027 0.01068997 0.0107522
|
|
0.01070142 0.01034212 0.00967431 0.01074028]
|
|
|
|
mean value: 0.010701823234558105
|
|
|
|
key: score_time
|
|
value: [0.01017404 0.00979447 0.00948381 0.00952983 0.00951076 0.00952482
|
|
0.00906324 0.0088284 0.008775 0.00955725]
|
|
|
|
mean value: 0.009424161911010743
|
|
|
|
key: test_mcc
|
|
value: [ 0.68193178 -0.03178209 0.53463203 0.34848485 0.31401826 0.30151915
|
|
0.71509694 0.38097804 0.23794034 0.16485939]
|
|
|
|
mean value: 0.3647678693597039
|
|
|
|
key: train_mcc
|
|
value: [0.35534379 0.42731074 0.35464096 0.38612423 0.39519578 0.38066533
|
|
0.35342511 0.38859133 0.42122023 0.40584961]
|
|
|
|
mean value: 0.38683671255257734
|
|
|
|
key: test_accuracy
|
|
value: [0.8372093 0.48837209 0.76744186 0.6744186 0.65116279 0.65116279
|
|
0.8372093 0.6744186 0.60465116 0.58139535]
|
|
|
|
mean value: 0.6767441860465117
|
|
|
|
key: train_accuracy
|
|
value: [0.6744186 0.71059432 0.6744186 0.68992248 0.69509044 0.6873385
|
|
0.6744186 0.69250646 0.70801034 0.7002584 ]
|
|
|
|
mean value: 0.6906976744186046
|
|
|
|
key: test_fscore
|
|
value: [0.82926829 0.56 0.77272727 0.68181818 0.70588235 0.63414634
|
|
0.85714286 0.72 0.66666667 0.59090909]
|
|
|
|
mean value: 0.7018561056351588
|
|
|
|
key: train_fscore
|
|
value: [0.7014218 0.73205742 0.7 0.71428571 0.71634615 0.71394799
|
|
0.7 0.71325301 0.73031026 0.72380952]
|
|
|
|
mean value: 0.7145431874278962
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.5 0.77272727 0.68181818 0.62068966 0.65
|
|
0.75 0.62068966 0.56666667 0.56521739]
|
|
|
|
mean value: 0.6622545664966559
|
|
|
|
key: train_precision
|
|
value: [0.64628821 0.68 0.64757709 0.66079295 0.66816143 0.65938865
|
|
0.65044248 0.66968326 0.68 0.67256637]
|
|
|
|
mean value: 0.6634900442401712
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.63636364 0.77272727 0.68181818 0.81818182 0.61904762
|
|
1. 0.85714286 0.80952381 0.61904762]
|
|
|
|
mean value: 0.7586580086580087
|
|
|
|
key: train_recall
|
|
value: [0.76683938 0.79274611 0.76165803 0.77720207 0.77202073 0.77835052
|
|
0.75773196 0.7628866 0.78865979 0.78350515]
|
|
|
|
mean value: 0.774160034186208
|
|
|
|
key: test_roc_auc
|
|
value: [0.83874459 0.48484848 0.76731602 0.67424242 0.64718615 0.6504329
|
|
0.84090909 0.67857143 0.60930736 0.58225108]
|
|
|
|
mean value: 0.6773809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [0.6746568 0.71080605 0.67464345 0.69014743 0.69528871 0.68710272
|
|
0.67420277 0.69232413 0.7078014 0.70004273]
|
|
|
|
mean value: 0.690701618503285
|
|
|
|
key: test_jcc
|
|
value: [0.70833333 0.38888889 0.62962963 0.51724138 0.54545455 0.46428571
|
|
0.75 0.5625 0.5 0.41935484]
|
|
|
|
mean value: 0.5485688329612134
|
|
|
|
key: train_jcc
|
|
value: [0.54014599 0.57735849 0.53846154 0.55555556 0.55805243 0.55514706
|
|
0.53846154 0.55430712 0.57518797 0.56716418]
|
|
|
|
mean value: 0.5559841866860746
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01252246 0.01743793 0.01833606 0.03522992 0.01919341 0.02123332
|
|
0.02283287 0.01666498 0.02263021 0.02485061]
|
|
|
|
mean value: 0.021093177795410156
|
|
|
|
key: score_time
|
|
value: [0.00881982 0.011379 0.01184511 0.01217556 0.01201034 0.01208806
|
|
0.01207376 0.01197124 0.01207709 0.01200581]
|
|
|
|
mean value: 0.011644577980041504
|
|
|
|
key: test_mcc
|
|
value: [0.3543982 0.32779278 0.49347 0.63123793 0.369787 0.30666041
|
|
0.67988342 0.39343507 0.29669666 0.48934219]
|
|
|
|
mean value: 0.4342703657405515
|
|
|
|
key: train_mcc
|
|
value: [0.33082727 0.45943233 0.48092172 0.71620269 0.49709833 0.73700447
|
|
0.6802225 0.47378378 0.70801498 0.43564422]
|
|
|
|
mean value: 0.551915229904182
|
|
|
|
key: test_accuracy
|
|
value: [0.60465116 0.60465116 0.72093023 0.81395349 0.65116279 0.65116279
|
|
0.8372093 0.62790698 0.62790698 0.69767442]
|
|
|
|
mean value: 0.6837209302325582
|
|
|
|
key: train_accuracy
|
|
value: [0.5994832 0.67700258 0.6873385 0.85788114 0.69767442 0.8630491
|
|
0.8372093 0.68992248 0.84754522 0.65891473]
|
|
|
|
mean value: 0.7416020671834626
|
|
|
|
key: test_fscore
|
|
value: [0.37037037 0.72131148 0.77777778 0.80952381 0.73684211 0.59459459
|
|
0.82051282 0.72413793 0.69230769 0.55172414]
|
|
|
|
mean value: 0.6799102714725577
|
|
|
|
key: train_fscore
|
|
value: [0.32900433 0.75442043 0.76134122 0.85488127 0.76739563 0.85070423
|
|
0.82644628 0.76190476 0.86117647 0.484375 ]
|
|
|
|
mean value: 0.7251649615674208
|
|
|
|
key: test_precision
|
|
value: [1. 0.56410256 0.65625 0.85 0.6 0.6875
|
|
0.88888889 0.56756757 0.58064516 1. ]
|
|
|
|
mean value: 0.7394954181849343
|
|
|
|
key: train_precision
|
|
value: [1. 0.60759494 0.61464968 0.87096774 0.62258065 0.9378882
|
|
0.88757396 0.61935484 0.79220779 1. ]
|
|
|
|
mean value: 0.7952817799506573
|
|
|
|
key: test_recall
|
|
value: [0.22727273 1. 0.95454545 0.77272727 0.95454545 0.52380952
|
|
0.76190476 1. 0.85714286 0.38095238]
|
|
|
|
mean value: 0.7432900432900433
|
|
|
|
key: train_recall
|
|
value: [0.19689119 0.99481865 1. 0.83937824 1. 0.77835052
|
|
0.77319588 0.98969072 0.94329897 0.31958763]
|
|
|
|
mean value: 0.7835211794241761
|
|
|
|
key: test_roc_auc
|
|
value: [0.61363636 0.5952381 0.71536797 0.81493506 0.64393939 0.6482684
|
|
0.83549784 0.63636364 0.63311688 0.69047619]
|
|
|
|
mean value: 0.6826839826839827
|
|
|
|
key: train_roc_auc
|
|
value: [0.5984456 0.6778217 0.68814433 0.85783345 0.69845361 0.86326852
|
|
0.83737514 0.68914588 0.84729715 0.65979381]
|
|
|
|
mean value: 0.7417579189145879
|
|
|
|
key: test_jcc
|
|
value: [0.22727273 0.56410256 0.63636364 0.68 0.58333333 0.42307692
|
|
0.69565217 0.56756757 0.52941176 0.38095238]
|
|
|
|
mean value: 0.5287733071288059
|
|
|
|
key: train_jcc
|
|
value: [0.19689119 0.60567823 0.61464968 0.74654378 0.62258065 0.74019608
|
|
0.70422535 0.61538462 0.75619835 0.31958763]
|
|
|
|
mean value: 0.5921935552542208
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02301335 0.02788758 0.02236009 0.0314877 0.01695895 0.02369475
|
|
0.02375484 0.02350283 0.02108383 0.01976204]
|
|
|
|
mean value: 0.02335059642791748
|
|
|
|
key: score_time
|
|
value: [0.01013947 0.01235223 0.01203251 0.01211309 0.01198268 0.02014351
|
|
0.01239824 0.01431727 0.01251864 0.01279259]
|
|
|
|
mean value: 0.013079023361206055
|
|
|
|
key: test_mcc
|
|
value: [0.43082022 0.30666041 0.50454827 0.41223987 0.369787 0.46619277
|
|
0.8276362 0.43082022 0.31757311 0.67462198]
|
|
|
|
mean value: 0.4740900049490931
|
|
|
|
key: train_mcc
|
|
value: [0.55190616 0.76754187 0.76829956 0.41878762 0.5091592 0.62622568
|
|
0.67412433 0.62438608 0.6676895 0.71275592]
|
|
|
|
mean value: 0.6320875933190009
|
|
|
|
key: test_accuracy
|
|
value: [0.65116279 0.65116279 0.74418605 0.6744186 0.65116279 0.69767442
|
|
0.90697674 0.65116279 0.62790698 0.8372093 ]
|
|
|
|
mean value: 0.7093023255813954
|
|
|
|
key: train_accuracy
|
|
value: [0.73385013 0.88372093 0.88372093 0.64857881 0.70542636 0.7881137
|
|
0.82687339 0.78294574 0.81395349 0.84754522]
|
|
|
|
mean value: 0.7914728682170542
|
|
|
|
key: test_fscore
|
|
value: [0.48275862 0.69387755 0.71794872 0.75 0.73684211 0.75471698
|
|
0.89473684 0.73684211 0.7037037 0.82926829]
|
|
|
|
mean value: 0.7300694919809066
|
|
|
|
key: train_fscore
|
|
value: [0.6360424 0.88431877 0.88607595 0.7394636 0.772 0.82327586
|
|
0.80351906 0.8212766 0.84140969 0.86310905]
|
|
|
|
mean value: 0.8070490979544427
|
|
|
|
key: test_precision
|
|
value: [1. 0.62962963 0.82352941 0.61764706 0.6 0.625
|
|
1. 0.58333333 0.57575758 0.85 ]
|
|
|
|
mean value: 0.7304897009308774
|
|
|
|
key: train_precision
|
|
value: [1. 0.87755102 0.86633663 0.58662614 0.6286645 0.70740741
|
|
0.93197279 0.69927536 0.73461538 0.78481013]
|
|
|
|
mean value: 0.7817259359042723
|
|
|
|
key: test_recall
|
|
value: [0.31818182 0.77272727 0.63636364 0.95454545 0.95454545 0.95238095
|
|
0.80952381 1. 0.9047619 0.80952381]
|
|
|
|
mean value: 0.8112554112554112
|
|
|
|
key: train_recall
|
|
value: [0.46632124 0.89119171 0.90673575 1. 1. 0.98453608
|
|
0.70618557 0.99484536 0.98453608 0.95876289]
|
|
|
|
mean value: 0.8893114684044656
|
|
|
|
key: test_roc_auc
|
|
value: [0.65909091 0.6482684 0.74675325 0.66774892 0.64393939 0.7034632
|
|
0.9047619 0.65909091 0.63419913 0.83658009]
|
|
|
|
mean value: 0.7103896103896105
|
|
|
|
key: train_roc_auc
|
|
value: [0.73316062 0.88374018 0.88378025 0.64948454 0.70618557 0.78760483
|
|
0.82718605 0.78239677 0.81351156 0.84725709]
|
|
|
|
mean value: 0.7914307462208215
|
|
|
|
key: test_jcc
|
|
value: [0.31818182 0.53125 0.56 0.6 0.58333333 0.60606061
|
|
0.80952381 0.58333333 0.54285714 0.70833333]
|
|
|
|
mean value: 0.5842873376623376
|
|
|
|
key: train_jcc
|
|
value: [0.46632124 0.79262673 0.79545455 0.58662614 0.6286645 0.6996337
|
|
0.67156863 0.6967509 0.72623574 0.75918367]
|
|
|
|
mean value: 0.6823065796546106
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19457316 0.17522335 0.17240524 0.16780663 0.16347218 0.16430736
|
|
0.16252875 0.16236234 0.16486979 0.16400599]
|
|
|
|
mean value: 0.16915547847747803
|
|
|
|
key: score_time
|
|
value: [0.01554251 0.01645303 0.01652455 0.01542091 0.01534796 0.0151906
|
|
0.01580358 0.01565909 0.01523995 0.01531696]
|
|
|
|
mean value: 0.015649914741516113
|
|
|
|
key: test_mcc
|
|
value: [0.54609991 0.20824344 0.53796222 0.4517935 0.81385281 0.34848485
|
|
0.81385281 0.4633482 0.35141081 0.76789769]
|
|
|
|
mean value: 0.5302946234932158
|
|
|
|
key: train_mcc
|
|
value: [0.92259409 0.93803584 0.92769572 0.96383644 0.93798408 0.94832007
|
|
0.90716502 0.92249139 0.95360082 0.91731211]
|
|
|
|
mean value: 0.9339035586204503
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.60465116 0.76744186 0.72093023 0.90697674 0.6744186
|
|
0.90697674 0.72093023 0.6744186 0.88372093]
|
|
|
|
mean value: 0.7627906976744185
|
|
|
|
key: train_accuracy
|
|
value: [0.96124031 0.96899225 0.96382429 0.98191214 0.96899225 0.97416021
|
|
0.95348837 0.96124031 0.97674419 0.95865633]
|
|
|
|
mean value: 0.9669250645994832
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.62222222 0.76190476 0.7 0.90909091 0.66666667
|
|
0.9047619 0.75 0.68181818 0.87804878]
|
|
|
|
mean value: 0.7624513426952452
|
|
|
|
key: train_fscore
|
|
value: [0.96143959 0.96907216 0.96354167 0.98181818 0.96891192 0.9742268
|
|
0.95408163 0.96143959 0.9769821 0.95876289]
|
|
|
|
mean value: 0.9670276528471051
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.60869565 0.8 0.77777778 0.90909091 0.66666667
|
|
0.9047619 0.66666667 0.65217391 0.9 ]
|
|
|
|
mean value: 0.7719166823514649
|
|
|
|
key: train_precision
|
|
value: [0.95408163 0.96410256 0.96858639 0.984375 0.96891192 0.9742268
|
|
0.94444444 0.95897436 0.96954315 0.95876289]
|
|
|
|
mean value: 0.9646009142637201
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.63636364 0.72727273 0.63636364 0.90909091 0.66666667
|
|
0.9047619 0.85714286 0.71428571 0.85714286]
|
|
|
|
mean value: 0.759090909090909
|
|
|
|
key: train_recall
|
|
value: [0.96891192 0.97409326 0.95854922 0.97927461 0.96891192 0.9742268
|
|
0.96391753 0.96391753 0.98453608 0.95876289]
|
|
|
|
mean value: 0.9695101757384755
|
|
|
|
key: test_roc_auc
|
|
value: [0.76948052 0.6038961 0.76839827 0.72294372 0.90692641 0.67424242
|
|
0.90692641 0.72402597 0.67532468 0.88311688]
|
|
|
|
mean value: 0.7635281385281385
|
|
|
|
key: train_roc_auc
|
|
value: [0.96126008 0.9690054 0.96381069 0.98190535 0.96899204 0.97416003
|
|
0.95346135 0.96123337 0.976724 0.95865605]
|
|
|
|
mean value: 0.966920837562096
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.4516129 0.61538462 0.53846154 0.83333333 0.5
|
|
0.82608696 0.6 0.51724138 0.7826087 ]
|
|
|
|
mean value: 0.6264729421889551
|
|
|
|
key: train_jcc
|
|
value: [0.92574257 0.94 0.92964824 0.96428571 0.93969849 0.94974874
|
|
0.91219512 0.92574257 0.955 0.92079208]
|
|
|
|
mean value: 0.9362853541346641
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06695819 0.07240248 0.07764935 0.07833457 0.09272051 0.08303666
|
|
0.08197641 0.06399465 0.06653547 0.09358835]
|
|
|
|
mean value: 0.07771966457366944
|
|
|
|
key: score_time
|
|
value: [0.01832271 0.02029538 0.02448392 0.03695583 0.02700496 0.02261472
|
|
0.02670813 0.02114224 0.02746582 0.0180223 ]
|
|
|
|
mean value: 0.024301600456237794
|
|
|
|
key: test_mcc
|
|
value: [0.61748053 0.3030303 0.68193178 0.64040632 0.81778934 0.44701207
|
|
0.81701092 0.67462198 0.48807056 0.69166471]
|
|
|
|
mean value: 0.6179018506275636
|
|
|
|
key: train_mcc
|
|
value: [0.96919751 0.96414361 0.95885876 0.95865605 0.96445208 0.97938089
|
|
0.94878037 0.95380961 0.95870837 0.95452645]
|
|
|
|
mean value: 0.9610513714125107
|
|
|
|
key: test_accuracy
|
|
value: [0.79069767 0.65116279 0.8372093 0.81395349 0.90697674 0.72093023
|
|
0.90697674 0.8372093 0.74418605 0.8372093 ]
|
|
|
|
mean value: 0.8046511627906977
|
|
|
|
key: train_accuracy
|
|
value: [0.98449612 0.98191214 0.97932817 0.97932817 0.98191214 0.98966408
|
|
0.97416021 0.97674419 0.97932817 0.97674419]
|
|
|
|
mean value: 0.9803617571059432
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.75675676 0.65116279 0.82926829 0.8 0.9047619 0.68421053
|
|
0.9 0.82926829 0.73170732 0.81081081]
|
|
|
|
mean value: 0.7897946691781961
|
|
|
|
key: train_fscore
|
|
value: [0.98429319 0.9816273 0.97905759 0.97927461 0.98153034 0.98963731
|
|
0.97382199 0.97650131 0.97927461 0.9762533 ]
|
|
|
|
mean value: 0.9801271546598425
|
|
|
|
key: test_precision
|
|
value: [0.93333333 0.66666667 0.89473684 0.88888889 0.95 0.76470588
|
|
0.94736842 0.85 0.75 0.9375 ]
|
|
|
|
mean value: 0.8583200034399725
|
|
|
|
key: train_precision
|
|
value: [0.99470899 0.99468085 0.98941799 0.97927461 1. 0.99479167
|
|
0.9893617 0.98941799 0.984375 1. ]
|
|
|
|
mean value: 0.9916028804802093
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.63636364 0.77272727 0.72727273 0.86363636 0.61904762
|
|
0.85714286 0.80952381 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7350649350649351
|
|
|
|
key: train_recall
|
|
value: [0.97409326 0.96891192 0.96891192 0.97927461 0.96373057 0.98453608
|
|
0.95876289 0.96391753 0.9742268 0.95360825]
|
|
|
|
mean value: 0.9689973826184499
|
|
|
|
key: test_roc_auc
|
|
value: [0.79437229 0.65151515 0.83874459 0.81601732 0.90800866 0.71861472
|
|
0.90584416 0.83658009 0.74350649 0.83441558]
|
|
|
|
mean value: 0.8047619047619048
|
|
|
|
key: train_roc_auc
|
|
value: [0.98446931 0.98187864 0.97930132 0.97932803 0.98186528 0.98967737
|
|
0.9742001 0.97677742 0.97934138 0.97680412]
|
|
|
|
mean value: 0.9803642967790182
|
|
|
|
key: test_jcc
|
|
value: [0.60869565 0.48275862 0.70833333 0.66666667 0.82608696 0.52
|
|
0.81818182 0.70833333 0.57692308 0.68181818]
|
|
|
|
mean value: 0.6597797639641718
|
|
|
|
key: train_jcc
|
|
value: [0.96907216 0.96391753 0.95897436 0.95939086 0.96373057 0.97948718
|
|
0.94897959 0.95408163 0.95939086 0.95360825]
|
|
|
|
mean value: 0.9610632996932176
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07039952 0.12239099 0.19194889 0.21874523 0.19765306 0.18567944
|
|
0.16427875 0.17072296 0.14521766 0.16551971]
|
|
|
|
mean value: 0.16325562000274657
|
|
|
|
key: score_time
|
|
value: [0.0144248 0.03488755 0.02736664 0.02442789 0.02979183 0.02604699
|
|
0.02822995 0.02328968 0.02602267 0.02613354]
|
|
|
|
mean value: 0.026062154769897462
|
|
|
|
key: test_mcc
|
|
value: [0.21351219 0.26106714 0.40088002 0.49456394 0.53796222 0.02380952
|
|
0.3071961 0.25541126 0.11688312 0.39696419]
|
|
|
|
mean value: 0.3008249695754552
|
|
|
|
key: train_mcc
|
|
value: [0.97427611 0.97427611 0.97427611 0.97937979 0.97427611 0.97932803
|
|
0.97417339 0.97938089 0.96904463 0.97427816]
|
|
|
|
mean value: 0.9752689337126527
|
|
|
|
key: test_accuracy
|
|
value: [0.60465116 0.62790698 0.69767442 0.74418605 0.76744186 0.51162791
|
|
0.65116279 0.62790698 0.55813953 0.69767442]
|
|
|
|
mean value: 0.6488372093023256
|
|
|
|
key: train_accuracy
|
|
value: [0.9870801 0.9870801 0.9870801 0.98966408 0.9870801 0.98966408
|
|
0.9870801 0.98966408 0.98449612 0.9870801 ]
|
|
|
|
mean value: 0.9875968992248062
|
|
|
|
key: test_fscore
|
|
value: [0.58536585 0.68 0.68292683 0.73170732 0.76190476 0.51162791
|
|
0.66666667 0.61904762 0.55813953 0.66666667]
|
|
|
|
mean value: 0.6464053156146179
|
|
|
|
key: train_fscore
|
|
value: [0.98694517 0.98694517 0.98694517 0.98958333 0.98694517 0.98969072
|
|
0.9870801 0.98963731 0.98445596 0.98701299]
|
|
|
|
mean value: 0.9875241088454858
|
|
|
|
key: test_precision
|
|
value: [0.63157895 0.60714286 0.73684211 0.78947368 0.8 0.5
|
|
0.625 0.61904762 0.54545455 0.72222222]
|
|
|
|
mean value: 0.6576761980709349
|
|
|
|
key: train_precision
|
|
value: [0.99473684 0.99473684 0.99473684 0.9947644 0.99473684 0.98969072
|
|
0.98963731 0.99479167 0.98958333 0.9947644 ]
|
|
|
|
mean value: 0.9932179191581537
|
|
|
|
key: test_recall
|
|
value: [0.54545455 0.77272727 0.63636364 0.68181818 0.72727273 0.52380952
|
|
0.71428571 0.61904762 0.57142857 0.61904762]
|
|
|
|
mean value: 0.6411255411255411
|
|
|
|
key: train_recall
|
|
value: [0.97927461 0.97927461 0.97927461 0.98445596 0.97927461 0.98969072
|
|
0.98453608 0.98453608 0.97938144 0.97938144]
|
|
|
|
mean value: 0.9819080177340954
|
|
|
|
key: test_roc_auc
|
|
value: [0.60606061 0.62445887 0.6991342 0.745671 0.76839827 0.51190476
|
|
0.6525974 0.62770563 0.55844156 0.69588745]
|
|
|
|
mean value: 0.6490259740259741
|
|
|
|
key: train_roc_auc
|
|
value: [0.98705999 0.98705999 0.98705999 0.98965066 0.98705999 0.98966401
|
|
0.98708669 0.98967737 0.98450937 0.98710005]
|
|
|
|
mean value: 0.9875928102131296
|
|
|
|
key: test_jcc
|
|
value: [0.4137931 0.51515152 0.51851852 0.57692308 0.61538462 0.34375
|
|
0.5 0.44827586 0.38709677 0.5 ]
|
|
|
|
mean value: 0.4818893465688516
|
|
|
|
key: train_jcc
|
|
value: [0.9742268 0.9742268 0.9742268 0.97938144 0.9742268 0.97959184
|
|
0.9744898 0.97948718 0.96938776 0.97435897]
|
|
|
|
mean value: 0.975360420139507
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.65034485 0.64601135 0.64165354 0.64062381 0.63947058 0.6445632
|
|
0.64104247 0.64255095 0.63853455 0.63667583]
|
|
|
|
mean value: 0.6421471118927002
|
|
|
|
key: score_time
|
|
value: [0.00956774 0.00966597 0.00943089 0.009624 0.00955415 0.00924993
|
|
0.00980234 0.00949836 0.00945091 0.00953102]
|
|
|
|
mean value: 0.009537529945373536
|
|
|
|
key: test_mcc
|
|
value: [0.63123793 0.25490741 0.86929961 0.64040632 0.81701092 0.44155844
|
|
0.90692641 0.72451364 0.4912706 0.77418983]
|
|
|
|
mean value: 0.6551321088445219
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81395349 0.62790698 0.93023256 0.81395349 0.90697674 0.72093023
|
|
0.95348837 0.86046512 0.74418605 0.88372093]
|
|
|
|
mean value: 0.8255813953488372
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.65217391 0.92682927 0.8 0.91304348 0.71428571
|
|
0.95238095 0.86363636 0.71794872 0.87179487]
|
|
|
|
mean value: 0.822161708916746
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85 0.625 1. 0.88888889 0.875 0.71428571
|
|
0.95238095 0.82608696 0.77777778 0.94444444]
|
|
|
|
mean value: 0.8453864734299517
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.68181818 0.86363636 0.72727273 0.95454545 0.71428571
|
|
0.95238095 0.9047619 0.66666667 0.80952381]
|
|
|
|
mean value: 0.8047619047619048
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81493506 0.62662338 0.93181818 0.81601732 0.90584416 0.72077922
|
|
0.9534632 0.86147186 0.74242424 0.88203463]
|
|
|
|
mean value: 0.8255411255411256
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.48387097 0.86363636 0.66666667 0.84 0.55555556
|
|
0.90909091 0.76 0.56 0.77272727]
|
|
|
|
mean value: 0.7091547735418703
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04376793 0.03075194 0.03130722 0.03120637 0.03110909 0.03160024
|
|
0.03136516 0.03184676 0.03131175 0.03312492]
|
|
|
|
mean value: 0.03273913860321045
|
|
|
|
key: score_time
|
|
value: [0.0127399 0.01823902 0.0177846 0.01779556 0.01547074 0.01559162
|
|
0.01546383 0.0193727 0.01602268 0.02370071]
|
|
|
|
mean value: 0.017218136787414552
|
|
|
|
key: test_mcc
|
|
value: [0.44227524 0.16762131 0.16154396 0.16726499 0.57247033 0.21351219
|
|
0.51986413 0.46619277 0.31757311 0.4517935 ]
|
|
|
|
mean value: 0.3480111526255688
|
|
|
|
key: train_mcc
|
|
value: [0.62455205 0.68487412 0.60464608 0.56891602 0.55700705 0.79136899
|
|
0.6761983 0.69660603 0.66807973 0.57186378]
|
|
|
|
mean value: 0.6444112162684469
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.55813953 0.58139535 0.58139535 0.76744186 0.60465116
|
|
0.74418605 0.69767442 0.62790698 0.72093023]
|
|
|
|
mean value: 0.6604651162790698
|
|
|
|
key: train_accuracy
|
|
value: [0.78036176 0.81912145 0.76744186 0.74418605 0.73643411 0.88630491
|
|
0.81395349 0.82687339 0.80878553 0.74677003]
|
|
|
|
mean value: 0.7930232558139535
|
|
|
|
key: test_fscore
|
|
value: [0.73913043 0.68852459 0.625 0.65384615 0.80769231 0.62222222
|
|
0.7755102 0.75471698 0.7037037 0.73913043]
|
|
|
|
mean value: 0.7109477032407248
|
|
|
|
key: train_fscore
|
|
value: [0.81953291 0.84649123 0.81092437 0.79587629 0.79098361 0.89767442
|
|
0.84347826 0.85274725 0.83982684 0.79835391]
|
|
|
|
mean value: 0.8295889083253458
|
|
|
|
key: test_precision
|
|
value: [0.70833333 0.53846154 0.57692308 0.56666667 0.7 0.58333333
|
|
0.67857143 0.625 0.57575758 0.68 ]
|
|
|
|
mean value: 0.6233046953046953
|
|
|
|
key: train_precision
|
|
value: [0.6942446 0.7338403 0.6819788 0.6609589 0.65423729 0.81779661
|
|
0.72932331 0.74329502 0.7238806 0.66438356]
|
|
|
|
mean value: 0.7103938995586828
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.95454545 0.68181818 0.77272727 0.95454545 0.66666667
|
|
0.9047619 0.95238095 0.9047619 0.80952381]
|
|
|
|
mean value: 0.8374458874458874
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 0.99484536
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994845360824742
|
|
|
|
key: test_roc_auc
|
|
value: [0.71969697 0.5487013 0.57900433 0.57683983 0.76298701 0.60606061
|
|
0.7478355 0.7034632 0.63419913 0.72294372]
|
|
|
|
mean value: 0.6601731601731602
|
|
|
|
key: train_roc_auc
|
|
value: [0.78092784 0.81958763 0.76804124 0.74484536 0.7371134 0.88602372
|
|
0.8134715 0.82642487 0.80829016 0.74611399]
|
|
|
|
mean value: 0.7930839698734042
|
|
|
|
key: test_jcc
|
|
value: [0.5862069 0.525 0.45454545 0.48571429 0.67741935 0.4516129
|
|
0.63333333 0.60606061 0.54285714 0.5862069 ]
|
|
|
|
mean value: 0.5548956873678786
|
|
|
|
key: train_jcc
|
|
value: [0.6942446 0.7338403 0.6819788 0.6609589 0.65423729 0.81434599
|
|
0.72932331 0.74329502 0.7238806 0.66438356]
|
|
|
|
mean value: 0.7100488376978518
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02356768 0.03810334 0.03743887 0.04197311 0.06063938 0.04091549
|
|
0.03609419 0.04485106 0.06518602 0.05394864]
|
|
|
|
mean value: 0.04427177906036377
|
|
|
|
key: score_time
|
|
value: [0.02559161 0.03329444 0.0238359 0.02330065 0.01614666 0.02104402
|
|
0.0379591 0.01286244 0.02467752 0.04604602]
|
|
|
|
mean value: 0.02647583484649658
|
|
|
|
key: test_mcc
|
|
value: [0.4517935 0.2567 0.54609991 0.67532468 0.62964308 0.25490741
|
|
0.81701092 0.59970431 0.31423621 0.58557701]
|
|
|
|
mean value: 0.513099699901425
|
|
|
|
key: train_mcc
|
|
value: [0.77265565 0.77859243 0.75711768 0.74723387 0.77265565 0.79332817
|
|
0.76745366 0.74686824 0.77787869 0.74174506]
|
|
|
|
mean value: 0.7655529075706143
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.62790698 0.76744186 0.8372093 0.81395349 0.62790698
|
|
0.90697674 0.79069767 0.65116279 0.79069767]
|
|
|
|
mean value: 0.7534883720930232
|
|
|
|
key: train_accuracy
|
|
value: [0.88630491 0.88888889 0.87855297 0.87338501 0.88630491 0.89664083
|
|
0.88372093 0.87338501 0.88888889 0.87080103]
|
|
|
|
mean value: 0.882687338501292
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.66666667 0.75 0.8372093 0.82608696 0.6
|
|
0.9 0.80851064 0.68085106 0.76923077]
|
|
|
|
mean value: 0.7538555396872416
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.88659794 0.88594164 0.87855297 0.8707124 0.88659794 0.89637306
|
|
0.88372093 0.87272727 0.88831169 0.87244898]
|
|
|
|
mean value: 0.8821984821340805
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.61538462 0.83333333 0.85714286 0.79166667 0.63157895
|
|
0.94736842 0.73076923 0.61538462 0.83333333]
|
|
|
|
mean value: 0.7633739798213482
|
|
|
|
key: train_precision
|
|
value: [0.88205128 0.9076087 0.87628866 0.88709677 0.88205128 0.90104167
|
|
0.88601036 0.87958115 0.89528796 0.86363636]
|
|
|
|
mean value: 0.8860654196687076
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.72727273 0.68181818 0.81818182 0.86363636 0.57142857
|
|
0.85714286 0.9047619 0.76190476 0.71428571]
|
|
|
|
mean value: 0.7536796536796537
|
|
|
|
key: train_recall
|
|
value: [0.89119171 0.86528497 0.88082902 0.85492228 0.89119171 0.89175258
|
|
0.8814433 0.86597938 0.8814433 0.8814433 ]
|
|
|
|
mean value: 0.8785481544789274
|
|
|
|
key: test_roc_auc
|
|
value: [0.72294372 0.62554113 0.76948052 0.83766234 0.81277056 0.62662338
|
|
0.90584416 0.79329004 0.65367965 0.78896104]
|
|
|
|
mean value: 0.7536796536796537
|
|
|
|
key: train_roc_auc
|
|
value: [0.8863175 0.88882805 0.87855884 0.87333743 0.8863175 0.89665349
|
|
0.88372683 0.8734042 0.88890818 0.87077346]
|
|
|
|
mean value: 0.8826825490091341
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.5 0.6 0.72 0.7037037 0.42857143
|
|
0.81818182 0.67857143 0.51612903 0.625 ]
|
|
|
|
mean value: 0.6128618949747981
|
|
|
|
key: train_jcc
|
|
value: [0.7962963 0.7952381 0.78341014 0.77102804 0.7962963 0.81220657
|
|
0.79166667 0.77419355 0.79906542 0.77375566]
|
|
|
|
mean value: 0.7893156727955775
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.33694553 0.3823595 0.36175776 0.33384323 0.39316416 0.4307847
|
|
0.36658239 0.29315281 0.39274859 0.34651494]
|
|
|
|
mean value: 0.3637853622436523
|
|
|
|
key: score_time
|
|
value: [0.0161376 0.01644969 0.02295113 0.02281094 0.02321029 0.02241588
|
|
0.01558924 0.02218533 0.02343941 0.02282286]
|
|
|
|
mean value: 0.020801234245300292
|
|
|
|
key: test_mcc
|
|
value: [0.49456394 0.2567 0.54609991 0.72451364 0.67462198 0.25490741
|
|
0.81701092 0.68193178 0.12392414 0.58557701]
|
|
|
|
mean value: 0.515985070255154
|
|
|
|
key: train_mcc
|
|
value: [0.70549739 0.77859243 0.75711768 0.68994899 0.70542988 0.79332817
|
|
0.65891245 0.65375781 0.71577373 0.68992041]
|
|
|
|
mean value: 0.7148278931967736
|
|
|
|
key: test_accuracy
|
|
value: [0.74418605 0.62790698 0.76744186 0.86046512 0.8372093 0.62790698
|
|
0.90697674 0.8372093 0.55813953 0.79069767]
|
|
|
|
mean value: 0.7558139534883721
|
|
|
|
key: train_accuracy
|
|
value: [0.85271318 0.88888889 0.87855297 0.84496124 0.85271318 0.89664083
|
|
0.82945736 0.82687339 0.85788114 0.84496124]
|
|
|
|
mean value: 0.8573643410852713
|
|
|
|
key: test_fscore
|
|
value: [0.73170732 0.66666667 0.75 0.85714286 0.84444444 0.6
|
|
0.9 0.84444444 0.59574468 0.76923077]
|
|
|
|
mean value: 0.7559381179853416
|
|
|
|
key: train_fscore
|
|
value: [0.85117493 0.88594164 0.87855297 0.84375 0.85194805 0.89637306
|
|
0.82989691 0.82687339 0.85788114 0.84536082]
|
|
|
|
mean value: 0.8567752913729867
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.61538462 0.83333333 0.9 0.82608696 0.63157895
|
|
0.94736842 0.79166667 0.53846154 0.83333333]
|
|
|
|
mean value: 0.7706687496332805
|
|
|
|
key: train_precision
|
|
value: [0.85789474 0.9076087 0.87628866 0.84816754 0.85416667 0.90104167
|
|
0.82989691 0.82901554 0.86010363 0.84536082]
|
|
|
|
mean value: 0.8609544867831661
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.72727273 0.68181818 0.81818182 0.86363636 0.57142857
|
|
0.85714286 0.9047619 0.66666667 0.71428571]
|
|
|
|
mean value: 0.7487012987012986
|
|
|
|
key: train_recall
|
|
value: [0.84455959 0.86528497 0.88082902 0.83937824 0.84974093 0.89175258
|
|
0.82989691 0.82474227 0.8556701 0.84536082]
|
|
|
|
mean value: 0.8527215426526361
|
|
|
|
key: test_roc_auc
|
|
value: [0.745671 0.62554113 0.76948052 0.86147186 0.83658009 0.62662338
|
|
0.90584416 0.83874459 0.56060606 0.78896104]
|
|
|
|
mean value: 0.7559523809523809
|
|
|
|
key: train_roc_auc
|
|
value: [0.85269216 0.88882805 0.87855884 0.84494685 0.85270552 0.89665349
|
|
0.82945623 0.82687891 0.85788687 0.84496021]
|
|
|
|
mean value: 0.8573567117141179
|
|
|
|
key: test_jcc
|
|
value: [0.57692308 0.5 0.6 0.75 0.73076923 0.42857143
|
|
0.81818182 0.73076923 0.42424242 0.625 ]
|
|
|
|
mean value: 0.6184457209457209
|
|
|
|
key: train_jcc
|
|
value: [0.74090909 0.7952381 0.78341014 0.72972973 0.74208145 0.81220657
|
|
0.7092511 0.70484581 0.75113122 0.73214286]
|
|
|
|
mean value: 0.7500946070021391
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05380988 0.07341051 0.05587864 0.05535698 0.04124856 0.03699017
|
|
0.03655267 0.05492473 0.03699756 0.0496068 ]
|
|
|
|
mean value: 0.049477648735046384
|
|
|
|
key: score_time
|
|
value: [0.01278281 0.01413512 0.0141964 0.01429844 0.01432276 0.01433253
|
|
0.01438165 0.01441407 0.01473069 0.01502562]
|
|
|
|
mean value: 0.014262008666992187
|
|
|
|
key: test_mcc
|
|
value: [0.40088002 0.44227524 0.44155844 0.67532468 0.62770563 0.39479486
|
|
0.81385281 0.48026322 0.35748709 0.69166471]
|
|
|
|
mean value: 0.5325806698493886
|
|
|
|
key: train_mcc
|
|
value: [0.71576614 0.73651032 0.71062262 0.7002564 0.71063807 0.73651032
|
|
0.69510176 0.71062262 0.71059238 0.69515975]
|
|
|
|
mean value: 0.7121780365755461
|
|
|
|
key: test_accuracy
|
|
value: [0.69767442 0.72093023 0.72093023 0.8372093 0.81395349 0.69767442
|
|
0.90697674 0.72093023 0.6744186 0.8372093 ]
|
|
|
|
mean value: 0.7627906976744185
|
|
|
|
key: train_accuracy
|
|
value: [0.85788114 0.86821705 0.85529716 0.8501292 0.85529716 0.86821705
|
|
0.84754522 0.85529716 0.85529716 0.84754522]
|
|
|
|
mean value: 0.8560723514211886
|
|
|
|
key: test_fscore
|
|
value: [0.68292683 0.73913043 0.72727273 0.8372093 0.81818182 0.68292683
|
|
0.9047619 0.76 0.69565217 0.81081081]
|
|
|
|
mean value: 0.765887283058508
|
|
|
|
key: train_fscore
|
|
value: [0.85714286 0.86684073 0.85416667 0.84974093 0.8556701 0.86956522
|
|
0.84754522 0.85641026 0.8556701 0.84910486]
|
|
|
|
mean value: 0.8561856946482916
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.70833333 0.72727273 0.85714286 0.81818182 0.7
|
|
0.9047619 0.65517241 0.64 0.9375 ]
|
|
|
|
mean value: 0.7685207159748902
|
|
|
|
key: train_precision
|
|
value: [0.859375 0.87368421 0.85863874 0.84974093 0.85128205 0.86294416
|
|
0.84974093 0.85204082 0.8556701 0.84263959]
|
|
|
|
mean value: 0.855575654631333
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.77272727 0.72727273 0.81818182 0.81818182 0.66666667
|
|
0.9047619 0.9047619 0.76190476 0.71428571]
|
|
|
|
mean value: 0.7725108225108225
|
|
|
|
key: train_recall
|
|
value: [0.85492228 0.86010363 0.84974093 0.84974093 0.86010363 0.87628866
|
|
0.84536082 0.86082474 0.8556701 0.8556701 ]
|
|
|
|
mean value: 0.8568425831953421
|
|
|
|
key: test_roc_auc
|
|
value: [0.6991342 0.71969697 0.72077922 0.83766234 0.81385281 0.6969697
|
|
0.90692641 0.72510823 0.67640693 0.83441558]
|
|
|
|
mean value: 0.763095238095238
|
|
|
|
key: train_roc_auc
|
|
value: [0.85787351 0.86819614 0.85528284 0.8501282 0.85530955 0.86819614
|
|
0.84755088 0.85528284 0.85529619 0.84752417]
|
|
|
|
mean value: 0.8560640457240531
|
|
|
|
key: test_jcc
|
|
value: [0.51851852 0.5862069 0.57142857 0.72 0.69230769 0.51851852
|
|
0.82608696 0.61290323 0.53333333 0.68181818]
|
|
|
|
mean value: 0.6261121894804731
|
|
|
|
key: train_jcc
|
|
value: [0.75 0.76497696 0.74545455 0.73873874 0.74774775 0.76923077
|
|
0.73542601 0.74887892 0.74774775 0.73777778]
|
|
|
|
mean value: 0.7485979217958099
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.96443534 1.14559245 0.9931128 1.13871431 1.37451053 1.55276895
|
|
1.93655849 1.40987992 1.65614939 2.07050133]
|
|
|
|
mean value: 1.4242223501205444
|
|
|
|
key: score_time
|
|
value: [0.01451755 0.019454 0.01595926 0.01519442 0.0151484 0.01562023
|
|
0.01477695 0.01309204 0.02022529 0.01571345]
|
|
|
|
mean value: 0.01597015857696533
|
|
|
|
key: test_mcc
|
|
value: [0.44468651 0.25490741 0.49456394 0.63123793 0.62770563 0.25490741
|
|
0.81385281 0.57954841 0.27084605 0.69166471]
|
|
|
|
mean value: 0.5063920803567467
|
|
|
|
key: train_mcc
|
|
value: [0.76744745 0.74163306 0.81435868 0.75711119 0.74703465 0.86568201
|
|
0.73643866 0.65380918 0.76230669 0.74163306]
|
|
|
|
mean value: 0.7587454624519377
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.62790698 0.74418605 0.81395349 0.81395349 0.62790698
|
|
0.90697674 0.76744186 0.62790698 0.8372093 ]
|
|
|
|
mean value: 0.7488372093023256
|
|
|
|
key: train_accuracy
|
|
value: [0.88372093 0.87080103 0.90697674 0.87855297 0.87338501 0.93281654
|
|
0.86821705 0.82687339 0.88113695 0.87080103]
|
|
|
|
mean value: 0.879328165374677
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.65217391 0.73170732 0.80952381 0.81818182 0.6
|
|
0.9047619 0.8 0.66666667 0.81081081]
|
|
|
|
mean value: 0.7508111954347373
|
|
|
|
key: train_fscore
|
|
value: [0.88311688 0.86979167 0.90816327 0.87792208 0.87468031 0.93264249
|
|
0.8688946 0.8286445 0.88205128 0.87179487]
|
|
|
|
mean value: 0.8797701943631095
|
|
|
|
key: test_precision
|
|
value: [0.75 0.625 0.78947368 0.85 0.81818182 0.63157895
|
|
0.9047619 0.68965517 0.59259259 0.9375 ]
|
|
|
|
mean value: 0.7588744119529056
|
|
|
|
key: train_precision
|
|
value: [0.88541667 0.87434555 0.89447236 0.88020833 0.86363636 0.9375
|
|
0.86666667 0.82233503 0.87755102 0.86734694]
|
|
|
|
mean value: 0.876947892641468
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.68181818 0.68181818 0.77272727 0.81818182 0.57142857
|
|
0.9047619 0.95238095 0.76190476 0.71428571]
|
|
|
|
mean value: 0.7541125541125541
|
|
|
|
key: train_recall
|
|
value: [0.88082902 0.86528497 0.92227979 0.87564767 0.88601036 0.92783505
|
|
0.87113402 0.83505155 0.88659794 0.87628866]
|
|
|
|
mean value: 0.8826959029966348
|
|
|
|
key: test_roc_auc
|
|
value: [0.72186147 0.62662338 0.745671 0.81493506 0.81385281 0.62662338
|
|
0.90692641 0.77164502 0.63095238 0.83441558]
|
|
|
|
mean value: 0.7493506493506493
|
|
|
|
key: train_roc_auc
|
|
value: [0.88371348 0.87078682 0.90701619 0.87854548 0.87341755 0.93282944
|
|
0.8682095 0.8268522 0.8811228 0.87078682]
|
|
|
|
mean value: 0.8793280273489664
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.48387097 0.57692308 0.68 0.69230769 0.42857143
|
|
0.82608696 0.66666667 0.5 0.68181818]
|
|
|
|
mean value: 0.6091800526106277
|
|
|
|
key: train_jcc
|
|
value: [0.79069767 0.76958525 0.8317757 0.78240741 0.77727273 0.87378641
|
|
0.76818182 0.70742358 0.78899083 0.77272727]
|
|
|
|
mean value: 0.786284866863972
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02845478 0.01532006 0.01588511 0.01669621 0.01684451 0.01621389
|
|
0.01678681 0.01132989 0.01192236 0.01090431]
|
|
|
|
mean value: 0.016035795211791992
|
|
|
|
key: score_time
|
|
value: [0.02339506 0.01329732 0.01424932 0.01453495 0.01454854 0.01461339
|
|
0.01037192 0.01000857 0.0116744 0.00963879]
|
|
|
|
mean value: 0.013633227348327637
|
|
|
|
key: test_mcc
|
|
value: [ 0.44701207 -0.03967598 0.39696419 0.34859132 0.36986766 0.3071961
|
|
0.67883359 0.17877574 0.42224772 0.11982827]
|
|
|
|
mean value: 0.32296406808134626
|
|
|
|
key: train_mcc
|
|
value: [0.37165326 0.40315208 0.39862039 0.38778838 0.37968295 0.41896574
|
|
0.37598756 0.43916108 0.41539874 0.46520935]
|
|
|
|
mean value: 0.40556195340462464
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.48837209 0.69767442 0.6744186 0.6744186 0.65116279
|
|
0.81395349 0.55813953 0.69767442 0.55813953]
|
|
|
|
mean value: 0.6534883720930232
|
|
|
|
key: train_accuracy
|
|
value: [0.6744186 0.69509044 0.68992248 0.67183463 0.67958656 0.70284238
|
|
0.67958656 0.71317829 0.7002584 0.72609819]
|
|
|
|
mean value: 0.6932816537467701
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.59259259 0.72340426 0.69565217 0.73076923 0.66666667
|
|
0.84 0.66666667 0.73469388 0.57777778]
|
|
|
|
mean value: 0.6978223241256147
|
|
|
|
key: train_fscore
|
|
value: [0.72123894 0.7281106 0.72972973 0.73263158 0.72321429 0.73684211
|
|
0.72197309 0.74482759 0.73636364 0.75576037]
|
|
|
|
mean value: 0.7330691922190511
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.5 0.68 0.66666667 0.63333333 0.625
|
|
0.72413793 0.52777778 0.64285714 0.54166667]
|
|
|
|
mean value: 0.6233747210643762
|
|
|
|
key: train_precision
|
|
value: [0.62934363 0.65560166 0.64541833 0.61702128 0.63529412 0.66255144
|
|
0.63888889 0.67219917 0.65853659 0.68333333]
|
|
|
|
mean value: 0.6498188428072472
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.72727273 0.77272727 0.72727273 0.86363636 0.71428571
|
|
1. 0.9047619 0.85714286 0.61904762]
|
|
|
|
mean value: 0.8004329004329005
|
|
|
|
key: train_recall
|
|
value: [0.84455959 0.81865285 0.83937824 0.9015544 0.83937824 0.82989691
|
|
0.82989691 0.83505155 0.83505155 0.84536082]
|
|
|
|
mean value: 0.8418781048020939
|
|
|
|
key: test_roc_auc
|
|
value: [0.71861472 0.48268398 0.69588745 0.67316017 0.66991342 0.6525974
|
|
0.81818182 0.56601732 0.7012987 0.55952381]
|
|
|
|
mean value: 0.6537878787878788
|
|
|
|
key: train_roc_auc
|
|
value: [0.67485711 0.6954089 0.69030768 0.67242669 0.6799984 0.70251322
|
|
0.67919716 0.71286256 0.69990919 0.72578922]
|
|
|
|
mean value: 0.6933270124459163
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.42105263 0.56666667 0.53333333 0.57575758 0.5
|
|
0.72413793 0.5 0.58064516 0.40625 ]
|
|
|
|
mean value: 0.5407843299661328
|
|
|
|
key: train_jcc
|
|
value: [0.56401384 0.57246377 0.57446809 0.57807309 0.56643357 0.58333333
|
|
0.56491228 0.59340659 0.58273381 0.60740741]
|
|
|
|
mean value: 0.5787245777986066
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01109862 0.01014376 0.01011562 0.0104208 0.01032352 0.01022625
|
|
0.0103817 0.01012492 0.01034141 0.01021791]
|
|
|
|
mean value: 0.010339450836181641
|
|
|
|
key: score_time
|
|
value: [0.00949383 0.00930595 0.00907779 0.00954795 0.00929666 0.00967145
|
|
0.00931907 0.00926375 0.00916362 0.00910592]
|
|
|
|
mean value: 0.009324598312377929
|
|
|
|
key: test_mcc
|
|
value: [0.48917749 0.06753957 0.20995671 0.50454827 0.39696419 0.3030303
|
|
0.72451364 0.36709713 0.36709713 0.34859132]
|
|
|
|
mean value: 0.3778515749103558
|
|
|
|
key: train_mcc
|
|
value: [0.46826734 0.51459683 0.46884804 0.49038014 0.49958596 0.51988165
|
|
0.4373134 0.47321307 0.45779106 0.48339175]
|
|
|
|
mean value: 0.48132692433520785
|
|
|
|
key: test_accuracy
|
|
value: [0.74418605 0.53488372 0.60465116 0.74418605 0.69767442 0.65116279
|
|
0.86046512 0.6744186 0.6744186 0.6744186 ]
|
|
|
|
mean value: 0.686046511627907
|
|
|
|
key: train_accuracy
|
|
value: [0.73385013 0.75710594 0.73385013 0.74418605 0.74935401 0.75968992
|
|
0.71834625 0.73643411 0.72868217 0.74160207]
|
|
|
|
mean value: 0.7403100775193798
|
|
|
|
key: test_fscore
|
|
value: [0.74418605 0.56521739 0.60465116 0.71794872 0.72340426 0.65116279
|
|
0.86363636 0.70833333 0.70833333 0.65 ]
|
|
|
|
mean value: 0.6936873394875245
|
|
|
|
key: train_fscore
|
|
value: [0.73924051 0.75132275 0.74185464 0.75434243 0.75566751 0.76574307
|
|
0.72681704 0.74242424 0.73551637 0.74619289]
|
|
|
|
mean value: 0.7459121456577962
|
|
|
|
key: test_precision
|
|
value: [0.76190476 0.54166667 0.61904762 0.82352941 0.68 0.63636364
|
|
0.82608696 0.62962963 0.62962963 0.68421053]
|
|
|
|
mean value: 0.6832068837844177
|
|
|
|
key: train_precision
|
|
value: [0.72277228 0.76756757 0.7184466 0.72380952 0.73529412 0.74876847
|
|
0.70731707 0.72772277 0.71921182 0.735 ]
|
|
|
|
mean value: 0.7305910229208082
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.59090909 0.59090909 0.63636364 0.77272727 0.66666667
|
|
0.9047619 0.80952381 0.80952381 0.61904762]
|
|
|
|
mean value: 0.7127705627705627
|
|
|
|
key: train_recall
|
|
value: [0.75647668 0.7357513 0.76683938 0.78756477 0.77720207 0.78350515
|
|
0.74742268 0.75773196 0.75257732 0.75773196]
|
|
|
|
mean value: 0.762280326905614
|
|
|
|
key: test_roc_auc
|
|
value: [0.74458874 0.53354978 0.60497835 0.74675325 0.69588745 0.65151515
|
|
0.86147186 0.67748918 0.67748918 0.67316017]
|
|
|
|
mean value: 0.6866883116883117
|
|
|
|
key: train_roc_auc
|
|
value: [0.73390845 0.75705091 0.73393515 0.74429785 0.74942578 0.75962822
|
|
0.71827093 0.73637893 0.72862027 0.74156028]
|
|
|
|
mean value: 0.7403076758720154
|
|
|
|
key: test_jcc
|
|
value: [0.59259259 0.39393939 0.43333333 0.56 0.56666667 0.48275862
|
|
0.76 0.5483871 0.5483871 0.48148148]
|
|
|
|
mean value: 0.536754628225151
|
|
|
|
key: train_jcc
|
|
value: [0.58634538 0.60169492 0.58964143 0.60557769 0.60728745 0.62040816
|
|
0.57086614 0.59036145 0.58167331 0.5951417 ]
|
|
|
|
mean value: 0.5948997627637519
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00967765 0.01076436 0.0116446 0.01169658 0.0106988 0.01059484
|
|
0.0109911 0.01088476 0.01136351 0.01097918]
|
|
|
|
mean value: 0.010929536819458009
|
|
|
|
key: score_time
|
|
value: [0.01712894 0.0148201 0.01408362 0.01479387 0.01760268 0.01796579
|
|
0.01856637 0.0191288 0.01793575 0.01807547]
|
|
|
|
mean value: 0.017010140419006347
|
|
|
|
key: test_mcc
|
|
value: [ 0.21040933 0.06926407 0.2581351 0.48917749 0.49456394 -0.01790718
|
|
0.06926407 0.20995671 0.3071961 0.20835137]
|
|
|
|
mean value: 0.22984110024173177
|
|
|
|
key: train_mcc
|
|
value: [0.52028836 0.49354951 0.49874453 0.49401307 0.50391282 0.56729474
|
|
0.48475098 0.53489677 0.51938999 0.5093282 ]
|
|
|
|
mean value: 0.51261689832722
|
|
|
|
key: test_accuracy
|
|
value: [0.60465116 0.53488372 0.62790698 0.74418605 0.74418605 0.48837209
|
|
0.53488372 0.60465116 0.65116279 0.60465116]
|
|
|
|
mean value: 0.613953488372093
|
|
|
|
key: train_accuracy
|
|
value: [0.75968992 0.74677003 0.74935401 0.74677003 0.75193798 0.78294574
|
|
0.74160207 0.76744186 0.75968992 0.75452196]
|
|
|
|
mean value: 0.7560723514211887
|
|
|
|
key: test_fscore
|
|
value: [0.65306122 0.54545455 0.61904762 0.74418605 0.73170732 0.54166667
|
|
0.52380952 0.60465116 0.66666667 0.56410256]
|
|
|
|
mean value: 0.6194353336612878
|
|
|
|
key: train_fscore
|
|
value: [0.76574307 0.74479167 0.74673629 0.75126904 0.75257732 0.79104478
|
|
0.75247525 0.76923077 0.75968992 0.75949367]
|
|
|
|
mean value: 0.7593051773504969
|
|
|
|
key: test_precision
|
|
value: [0.59259259 0.54545455 0.65 0.76190476 0.78947368 0.48148148
|
|
0.52380952 0.59090909 0.625 0.61111111]
|
|
|
|
mean value: 0.6171736791473633
|
|
|
|
key: train_precision
|
|
value: [0.74509804 0.7486911 0.75263158 0.73631841 0.74871795 0.76442308
|
|
0.72380952 0.76530612 0.76165803 0.74626866]
|
|
|
|
mean value: 0.7492922485303724
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.54545455 0.59090909 0.72727273 0.68181818 0.61904762
|
|
0.52380952 0.61904762 0.71428571 0.52380952]
|
|
|
|
mean value: 0.6272727272727273
|
|
|
|
key: train_recall
|
|
value: [0.78756477 0.74093264 0.74093264 0.76683938 0.75647668 0.81958763
|
|
0.78350515 0.77319588 0.75773196 0.77319588]
|
|
|
|
mean value: 0.7699962608834998
|
|
|
|
key: test_roc_auc
|
|
value: [0.6017316 0.53463203 0.62878788 0.74458874 0.745671 0.49134199
|
|
0.53463203 0.60497835 0.6525974 0.60281385]
|
|
|
|
mean value: 0.6141774891774892
|
|
|
|
key: train_roc_auc
|
|
value: [0.75976176 0.74675498 0.7493323 0.74682175 0.75194968 0.78285081
|
|
0.74149351 0.76742695 0.75969499 0.75447359]
|
|
|
|
mean value: 0.7560560333315528
|
|
|
|
key: test_jcc
|
|
value: [0.48484848 0.375 0.44827586 0.59259259 0.57692308 0.37142857
|
|
0.35483871 0.43333333 0.5 0.39285714]
|
|
|
|
mean value: 0.45300977737295867
|
|
|
|
key: train_jcc
|
|
value: [0.62040816 0.593361 0.59583333 0.60162602 0.60330579 0.65432099
|
|
0.6031746 0.625 0.6125 0.6122449 ]
|
|
|
|
mean value: 0.61217747826215
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02884865 0.03000712 0.03000569 0.0280571 0.0189383 0.02147675
|
|
0.01978326 0.01903105 0.02211714 0.01889801]
|
|
|
|
mean value: 0.023716306686401366
|
|
|
|
key: score_time
|
|
value: [0.01581597 0.01788545 0.0181725 0.01611257 0.01134253 0.0113802
|
|
0.01225686 0.0116148 0.01208067 0.01143336]
|
|
|
|
mean value: 0.013809490203857421
|
|
|
|
key: test_mcc
|
|
value: [0.39479486 0.44701207 0.25541126 0.68193178 0.58134627 0.3071961
|
|
0.68193178 0.50454827 0.4517935 0.5421681 ]
|
|
|
|
mean value: 0.484813397777736
|
|
|
|
key: train_mcc
|
|
value: [0.71577373 0.706524 0.71075971 0.68996555 0.69518417 0.7159805
|
|
0.69120159 0.70564037 0.73129624 0.69518417]
|
|
|
|
mean value: 0.705751002787364
|
|
|
|
key: test_accuracy
|
|
value: [0.69767442 0.72093023 0.62790698 0.8372093 0.79069767 0.65116279
|
|
0.8372093 0.74418605 0.72093023 0.76744186]
|
|
|
|
mean value: 0.7395348837209302
|
|
|
|
key: train_accuracy
|
|
value: [0.85788114 0.85271318 0.85529716 0.84496124 0.84754522 0.85788114
|
|
0.84496124 0.85271318 0.86563307 0.84754522]
|
|
|
|
mean value: 0.8527131782945736
|
|
|
|
key: test_fscore
|
|
value: [0.71111111 0.75 0.63636364 0.82926829 0.8 0.66666667
|
|
0.84444444 0.76595745 0.73913043 0.73684211]
|
|
|
|
mean value: 0.7479784138123062
|
|
|
|
key: train_fscore
|
|
value: [0.85788114 0.848 0.85641026 0.84536082 0.84832905 0.86005089
|
|
0.85 0.85496183 0.86666667 0.84675325]
|
|
|
|
mean value: 0.8534413903012841
|
|
|
|
key: test_precision
|
|
value: [0.69565217 0.69230769 0.63636364 0.89473684 0.7826087 0.625
|
|
0.79166667 0.69230769 0.68 0.82352941]
|
|
|
|
mean value: 0.7314172811080875
|
|
|
|
key: train_precision
|
|
value: [0.8556701 0.87362637 0.84771574 0.84102564 0.84183673 0.84924623
|
|
0.82524272 0.84422111 0.8622449 0.85340314]
|
|
|
|
mean value: 0.8494232682929744
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.81818182 0.63636364 0.77272727 0.81818182 0.71428571
|
|
0.9047619 0.85714286 0.80952381 0.66666667]
|
|
|
|
mean value: 0.7725108225108225
|
|
|
|
key: train_recall
|
|
value: [0.86010363 0.8238342 0.86528497 0.84974093 0.85492228 0.87113402
|
|
0.87628866 0.86597938 0.87113402 0.84020619]
|
|
|
|
mean value: 0.8578628278403931
|
|
|
|
key: test_roc_auc
|
|
value: [0.6969697 0.71861472 0.62770563 0.83874459 0.79004329 0.6525974
|
|
0.83874459 0.74675325 0.72294372 0.76515152]
|
|
|
|
mean value: 0.7398268398268398
|
|
|
|
key: train_roc_auc
|
|
value: [0.85788687 0.85263875 0.8553229 0.84497356 0.84756423 0.8578468
|
|
0.84488008 0.85267881 0.86561882 0.84756423]
|
|
|
|
mean value: 0.8526975054751349
|
|
|
|
key: test_jcc
|
|
value: [0.55172414 0.6 0.46666667 0.70833333 0.66666667 0.5
|
|
0.73076923 0.62068966 0.5862069 0.58333333]
|
|
|
|
mean value: 0.6014389920424403
|
|
|
|
key: train_jcc
|
|
value: [0.75113122 0.73611111 0.74887892 0.73214286 0.73660714 0.75446429
|
|
0.73913043 0.74666667 0.76470588 0.73423423]
|
|
|
|
mean value: 0.744407276034812
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.80615783 2.84482694 3.85962248 3.31184196 2.43806434 3.70860004
|
|
4.42152381 3.72705698 4.61319971 3.57754469]
|
|
|
|
mean value: 3.5308438777923583
|
|
|
|
key: score_time
|
|
value: [0.01527119 0.0216074 0.01274061 0.03064179 0.015028 0.0196197
|
|
0.01770926 0.01731467 0.02244329 0.05502224]
|
|
|
|
mean value: 0.022739815711975097
|
|
|
|
key: test_mcc
|
|
value: [0.44468651 0.44227524 0.40088002 0.58134627 0.62770563 0.2581351
|
|
0.76789769 0.54609991 0.30151915 0.65153277]
|
|
|
|
mean value: 0.5022078281156532
|
|
|
|
key: train_mcc
|
|
value: [0.96904298 0.96393847 0.96383644 0.96899204 0.95870837 0.97932803
|
|
0.96383644 0.96899204 0.97417339 0.95865605]
|
|
|
|
mean value: 0.9669504240144378
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.72093023 0.69767442 0.79069767 0.81395349 0.62790698
|
|
0.88372093 0.76744186 0.65116279 0.81395349]
|
|
|
|
mean value: 0.7488372093023256
|
|
|
|
key: train_accuracy
|
|
value: [0.98449612 0.98191214 0.98191214 0.98449612 0.97932817 0.98966408
|
|
0.98191214 0.98449612 0.9870801 0.97932817]
|
|
|
|
mean value: 0.9834625322997416
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.73913043 0.68292683 0.8 0.81818182 0.63636364
|
|
0.87804878 0.7826087 0.63414634 0.77777778]
|
|
|
|
mean value: 0.7463470028263242
|
|
|
|
key: train_fscore
|
|
value: [0.984375 0.98172324 0.98181818 0.98445596 0.97938144 0.98969072
|
|
0.98200514 0.98453608 0.9870801 0.97938144]
|
|
|
|
mean value: 0.9834447313434314
|
|
|
|
key: test_precision
|
|
value: [0.75 0.70833333 0.73684211 0.7826087 0.81818182 0.60869565
|
|
0.9 0.72 0.65 0.93333333]
|
|
|
|
mean value: 0.760799493793773
|
|
|
|
key: train_precision
|
|
value: [0.9895288 0.98947368 0.984375 0.98445596 0.97435897 0.98969072
|
|
0.97948718 0.98453608 0.98963731 0.97938144]
|
|
|
|
mean value: 0.9844925145539584
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.77272727 0.63636364 0.81818182 0.81818182 0.66666667
|
|
0.85714286 0.85714286 0.61904762 0.66666667]
|
|
|
|
mean value: 0.7393939393939394
|
|
|
|
key: train_recall
|
|
value: [0.97927461 0.97409326 0.97927461 0.98445596 0.98445596 0.98969072
|
|
0.98453608 0.98453608 0.98453608 0.97938144]
|
|
|
|
mean value: 0.9824234816516212
|
|
|
|
key: test_roc_auc
|
|
value: [0.72186147 0.71969697 0.6991342 0.79004329 0.81385281 0.62878788
|
|
0.88311688 0.76948052 0.6504329 0.81060606]
|
|
|
|
mean value: 0.7487012987012986
|
|
|
|
key: train_roc_auc
|
|
value: [0.98448267 0.98189199 0.98190535 0.98449602 0.97934138 0.98966401
|
|
0.98190535 0.98449602 0.98708669 0.97932803]
|
|
|
|
mean value: 0.983459751081673
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.5862069 0.51851852 0.66666667 0.69230769 0.46666667
|
|
0.7826087 0.64285714 0.46428571 0.63636364]
|
|
|
|
mean value: 0.6012037185425492
|
|
|
|
key: train_jcc
|
|
value: [0.96923077 0.96410256 0.96428571 0.96938776 0.95959596 0.97959184
|
|
0.96464646 0.96954315 0.9744898 0.95959596]
|
|
|
|
mean value: 0.9674469966420656
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04940581 0.03220487 0.02999425 0.03011823 0.02668381 0.03157115
|
|
0.03161311 0.03097177 0.03061152 0.03121257]
|
|
|
|
mean value: 0.03243870735168457
|
|
|
|
key: score_time
|
|
value: [0.01049972 0.01050186 0.01050282 0.01048779 0.01052856 0.01086879
|
|
0.0111475 0.01051283 0.01061201 0.01057076]
|
|
|
|
mean value: 0.01062326431274414
|
|
|
|
key: test_mcc
|
|
value: [0.49456394 0.30151915 0.35141081 0.53463203 0.91106505 0.36709713
|
|
0.86117339 0.67532468 0.63123793 0.55391636]
|
|
|
|
mean value: 0.5681940463108934
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.74418605 0.65116279 0.6744186 0.76744186 0.95348837 0.6744186
|
|
0.93023256 0.8372093 0.81395349 0.76744186]
|
|
|
|
mean value: 0.7813953488372093
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.73170732 0.66666667 0.66666667 0.77272727 0.95238095 0.70833333
|
|
0.92682927 0.8372093 0.81818182 0.72222222]
|
|
|
|
mean value: 0.7802924819870367
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.65217391 0.7 0.77272727 1. 0.62962963
|
|
0.95 0.81818182 0.7826087 0.86666667]
|
|
|
|
mean value: 0.7961461680111566
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.68181818 0.63636364 0.77272727 0.90909091 0.80952381
|
|
0.9047619 0.85714286 0.85714286 0.61904762]
|
|
|
|
mean value: 0.7729437229437229
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.745671 0.6504329 0.67532468 0.76731602 0.95454545 0.67748918
|
|
0.92965368 0.83766234 0.81493506 0.76406926]
|
|
|
|
mean value: 0.7817099567099567
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57692308 0.5 0.5 0.62962963 0.90909091 0.5483871
|
|
0.86363636 0.72 0.69230769 0.56521739]
|
|
|
|
mean value: 0.6505192159666213
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1436162 0.14201403 0.1432929 0.14335656 0.14548445 0.14574623
|
|
0.14563632 0.14425778 0.13957262 0.15334868]
|
|
|
|
mean value: 0.14463257789611816
|
|
|
|
key: score_time
|
|
value: [0.02052784 0.02075601 0.02071357 0.02085137 0.02147961 0.02125859
|
|
0.02084136 0.02108717 0.02033615 0.02889132]
|
|
|
|
mean value: 0.021674299240112306
|
|
|
|
key: test_mcc
|
|
value: [0.53796222 0.35185603 0.3961039 0.67532468 0.62770563 0.3071961
|
|
0.58824786 0.65585036 0.39479486 0.49916256]
|
|
|
|
mean value: 0.5034204185252775
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.6744186 0.69767442 0.8372093 0.81395349 0.65116279
|
|
0.79069767 0.81395349 0.69767442 0.74418605]
|
|
|
|
mean value: 0.7488372093023256
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.70833333 0.69767442 0.8372093 0.81818182 0.66666667
|
|
0.8 0.83333333 0.68292683 0.7027027 ]
|
|
|
|
mean value: 0.7508933166321141
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.65384615 0.71428571 0.85714286 0.81818182 0.625
|
|
0.75 0.74074074 0.7 0.8125 ]
|
|
|
|
mean value: 0.7471697284197284
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.77272727 0.68181818 0.81818182 0.81818182 0.71428571
|
|
0.85714286 0.95238095 0.66666667 0.61904762]
|
|
|
|
mean value: 0.7627705627705628
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76839827 0.67207792 0.69805195 0.83766234 0.81385281 0.6525974
|
|
0.79220779 0.81709957 0.6969697 0.74134199]
|
|
|
|
mean value: 0.749025974025974
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.5483871 0.53571429 0.72 0.69230769 0.5
|
|
0.66666667 0.71428571 0.51851852 0.54166667]
|
|
|
|
mean value: 0.6052931256318352
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01237321 0.01223278 0.01213527 0.01217461 0.01231837 0.01222086
|
|
0.01203585 0.01226687 0.01231027 0.01201582]
|
|
|
|
mean value: 0.012208390235900878
|
|
|
|
key: score_time
|
|
value: [0.01035666 0.01029444 0.01033831 0.01035571 0.01038551 0.01044679
|
|
0.01041675 0.01035023 0.01033854 0.01028085]
|
|
|
|
mean value: 0.010356378555297852
|
|
|
|
key: test_mcc
|
|
value: [0.49456394 0.20824344 0.49456394 0.16485939 0.44227524 0.39696419
|
|
0.34848485 0.35141081 0.30666041 0.44155844]
|
|
|
|
mean value: 0.36495846433707946
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.74418605 0.60465116 0.74418605 0.58139535 0.72093023 0.69767442
|
|
0.6744186 0.6744186 0.65116279 0.72093023]
|
|
|
|
mean value: 0.6813953488372093
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.73170732 0.62222222 0.73170732 0.57142857 0.73913043 0.66666667
|
|
0.66666667 0.68181818 0.59459459 0.71428571]
|
|
|
|
mean value: 0.6720227686611568
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.60869565 0.78947368 0.6 0.70833333 0.72222222
|
|
0.66666667 0.65217391 0.6875 0.71428571]
|
|
|
|
mean value: 0.6938824870146381
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.63636364 0.68181818 0.54545455 0.77272727 0.61904762
|
|
0.66666667 0.71428571 0.52380952 0.71428571]
|
|
|
|
mean value: 0.6556277056277056
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.745671 0.6038961 0.745671 0.58225108 0.71969697 0.69588745
|
|
0.67424242 0.67532468 0.6482684 0.72077922]
|
|
|
|
mean value: 0.6811688311688312
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57692308 0.4516129 0.57692308 0.4 0.5862069 0.5
|
|
0.5 0.51724138 0.42307692 0.55555556]
|
|
|
|
mean value: 0.5087539811566508
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.01366973 1.98927975 2.00896358 1.99248004 2.00078726 1.97842026
|
|
2.0029223 2.10296893 2.33100224 1.73028731]
|
|
|
|
mean value: 2.015078139305115
|
|
|
|
key: score_time
|
|
value: [0.10578775 0.10516572 0.10582972 0.10600066 0.10558033 0.10556436
|
|
0.10616589 0.12238479 0.09266567 0.10008526]
|
|
|
|
mean value: 0.10552301406860351
|
|
|
|
key: test_mcc
|
|
value: [0.54609991 0.55391636 0.67462198 0.55959928 0.76839827 0.30151915
|
|
0.72077922 0.73471273 0.53463203 0.59541363]
|
|
|
|
mean value: 0.5989692558350634
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.76744186 0.8372093 0.76744186 0.88372093 0.65116279
|
|
0.86046512 0.86046512 0.76744186 0.79069767]
|
|
|
|
mean value: 0.7953488372093024
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.8 0.84444444 0.73684211 0.88372093 0.63414634
|
|
0.85714286 0.86956522 0.76190476 0.75675676]
|
|
|
|
mean value: 0.7894523414599255
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.71428571 0.82608696 0.875 0.9047619 0.65
|
|
0.85714286 0.8 0.76190476 0.875 ]
|
|
|
|
mean value: 0.8097515527950311
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.90909091 0.86363636 0.63636364 0.86363636 0.61904762
|
|
0.85714286 0.95238095 0.76190476 0.66666667]
|
|
|
|
mean value: 0.7811688311688312
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76948052 0.76406926 0.83658009 0.77056277 0.88419913 0.6504329
|
|
0.86038961 0.86255411 0.76731602 0.78787879]
|
|
|
|
mean value: 0.7953463203463204
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.66666667 0.73076923 0.58333333 0.79166667 0.46428571
|
|
0.75 0.76923077 0.61538462 0.60869565]
|
|
|
|
mean value: 0.658003264851091
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.04522681 2.64263201 2.22396159 0.96589065 0.95259833 0.92633867
|
|
0.92470813 0.97479391 0.94780827 0.96445656]
|
|
|
|
mean value: 1.256841492652893
|
|
|
|
key: score_time
|
|
value: [0.19241905 0.23578048 0.18349552 0.27233052 0.15954733 0.20956826
|
|
0.12825727 0.14750695 0.12867522 0.26806164]
|
|
|
|
mean value: 0.19256422519683838
|
|
|
|
key: test_mcc
|
|
value: [0.58824786 0.49916256 0.67532468 0.61748053 0.81385281 0.39696419
|
|
0.81385281 0.65585036 0.62964308 0.63732414]
|
|
|
|
mean value: 0.6327703013890973
|
|
|
|
key: train_mcc
|
|
value: [0.90697612 0.86563218 0.89158365 0.88635453 0.88143837 0.88123732
|
|
0.8914826 0.88630415 0.90702706 0.88630415]
|
|
|
|
mean value: 0.8884340130519578
|
|
|
|
key: test_accuracy
|
|
value: [0.79069767 0.74418605 0.8372093 0.79069767 0.90697674 0.69767442
|
|
0.90697674 0.81395349 0.81395349 0.81395349]
|
|
|
|
mean value: 0.8116279069767441
|
|
|
|
key: train_accuracy
|
|
value: [0.95348837 0.93281654 0.94573643 0.94315245 0.94056848 0.94056848
|
|
0.94573643 0.94315245 0.95348837 0.94315245]
|
|
|
|
mean value: 0.944186046511628
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.7755102 0.8372093 0.75675676 0.90909091 0.66666667
|
|
0.9047619 0.83333333 0.8 0.78947368]
|
|
|
|
mean value: 0.805329056610536
|
|
|
|
key: train_fscore
|
|
value: [0.95336788 0.93264249 0.94601542 0.94329897 0.94117647 0.94117647
|
|
0.94601542 0.94329897 0.95336788 0.94329897]
|
|
|
|
mean value: 0.9443658935063983
|
|
|
|
key: test_precision
|
|
value: [0.84210526 0.7037037 0.85714286 0.93333333 0.90909091 0.72222222
|
|
0.9047619 0.74074074 0.84210526 0.88235294]
|
|
|
|
mean value: 0.8337559138487931
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
train_precision
|
|
value: [0.95336788 0.93264249 0.93877551 0.93846154 0.92929293 0.93401015
|
|
0.94358974 0.94329897 0.95833333 0.94329897]
|
|
|
|
mean value: 0.9415071508004521
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.86363636 0.81818182 0.63636364 0.90909091 0.61904762
|
|
0.9047619 0.95238095 0.76190476 0.71428571]
|
|
|
|
mean value: 0.7906926406926407
|
|
|
|
key: train_recall
|
|
value: [0.95336788 0.93264249 0.95336788 0.94818653 0.95336788 0.94845361
|
|
0.94845361 0.94329897 0.94845361 0.94329897]
|
|
|
|
mean value: 0.9472891405373645
|
|
|
|
key: test_roc_auc
|
|
value: [0.79220779 0.74134199 0.83766234 0.79437229 0.90692641 0.69588745
|
|
0.90692641 0.81709957 0.81277056 0.81168831]
|
|
|
|
mean value: 0.8116883116883117
|
|
|
|
key: train_roc_auc
|
|
value: [0.95348806 0.93281609 0.9457561 0.94316543 0.94060146 0.94054805
|
|
0.94572939 0.94315208 0.95350142 0.94315208]
|
|
|
|
mean value: 0.9441910154372095
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.63333333 0.72 0.60869565 0.83333333 0.5
|
|
0.82608696 0.71428571 0.66666667 0.65217391]
|
|
|
|
mean value: 0.6794575569358178
|
|
|
|
key: train_jcc
|
|
value: [0.91089109 0.87378641 0.89756098 0.89268293 0.88888889 0.88888889
|
|
0.89756098 0.89268293 0.91089109 0.89268293]
|
|
|
|
mean value: 0.8946517095469907
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02952361 0.01485586 0.02655649 0.01511598 0.01541734 0.01484561
|
|
0.0150702 0.01474094 0.0148046 0.01679945]
|
|
|
|
mean value: 0.017773008346557616
|
|
|
|
key: score_time
|
|
value: [0.02834201 0.01304078 0.01354599 0.0139358 0.01270437 0.01640105
|
|
0.01323628 0.01268053 0.01396489 0.01378512]
|
|
|
|
mean value: 0.015163683891296386
|
|
|
|
key: test_mcc
|
|
value: [0.48917749 0.06753957 0.20995671 0.50454827 0.39696419 0.3030303
|
|
0.72451364 0.36709713 0.36709713 0.34859132]
|
|
|
|
mean value: 0.3778515749103558
|
|
|
|
key: train_mcc
|
|
value: [0.46826734 0.51459683 0.46884804 0.49038014 0.49958596 0.51988165
|
|
0.4373134 0.47321307 0.45779106 0.48339175]
|
|
|
|
mean value: 0.48132692433520785
|
|
|
|
key: test_accuracy
|
|
value: [0.74418605 0.53488372 0.60465116 0.74418605 0.69767442 0.65116279
|
|
0.86046512 0.6744186 0.6744186 0.6744186 ]
|
|
|
|
mean value: 0.686046511627907
|
|
|
|
key: train_accuracy
|
|
value: [0.73385013 0.75710594 0.73385013 0.74418605 0.74935401 0.75968992
|
|
0.71834625 0.73643411 0.72868217 0.74160207]
|
|
|
|
mean value: 0.7403100775193798
|
|
|
|
key: test_fscore
|
|
value: [0.74418605 0.56521739 0.60465116 0.71794872 0.72340426 0.65116279
|
|
0.86363636 0.70833333 0.70833333 0.65 ]
|
|
|
|
mean value: 0.6936873394875245
|
|
|
|
key: train_fscore
|
|
value: [0.73924051 0.75132275 0.74185464 0.75434243 0.75566751 0.76574307
|
|
0.72681704 0.74242424 0.73551637 0.74619289]
|
|
|
|
mean value: 0.7459121456577962
|
|
|
|
key: test_precision
|
|
value: [0.76190476 0.54166667 0.61904762 0.82352941 0.68 0.63636364
|
|
0.82608696 0.62962963 0.62962963 0.68421053]
|
|
|
|
mean value: 0.6832068837844177
|
|
|
|
key: train_precision
|
|
value: [0.72277228 0.76756757 0.7184466 0.72380952 0.73529412 0.74876847
|
|
0.70731707 0.72772277 0.71921182 0.735 ]
|
|
|
|
mean value: 0.7305910229208082
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.59090909 0.59090909 0.63636364 0.77272727 0.66666667
|
|
0.9047619 0.80952381 0.80952381 0.61904762]
|
|
|
|
mean value: 0.7127705627705627
|
|
|
|
key: train_recall
|
|
value: [0.75647668 0.7357513 0.76683938 0.78756477 0.77720207 0.78350515
|
|
0.74742268 0.75773196 0.75257732 0.75773196]
|
|
|
|
mean value: 0.762280326905614
|
|
|
|
key: test_roc_auc
|
|
value: [0.74458874 0.53354978 0.60497835 0.74675325 0.69588745 0.65151515
|
|
0.86147186 0.67748918 0.67748918 0.67316017]
|
|
|
|
mean value: 0.6866883116883117
|
|
|
|
key: train_roc_auc
|
|
value: [0.73390845 0.75705091 0.73393515 0.74429785 0.74942578 0.75962822
|
|
0.71827093 0.73637893 0.72862027 0.74156028]
|
|
|
|
mean value: 0.7403076758720154
|
|
|
|
key: test_jcc
|
|
value: [0.59259259 0.39393939 0.43333333 0.56 0.56666667 0.48275862
|
|
0.76 0.5483871 0.5483871 0.48148148]
|
|
|
|
mean value: 0.536754628225151
|
|
|
|
key: train_jcc
|
|
value: [0.58634538 0.60169492 0.58964143 0.60557769 0.60728745 0.62040816
|
|
0.57086614 0.59036145 0.58167331 0.5951417 ]
|
|
|
|
mean value: 0.5948997627637519
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [9.16078043 7.99839926 6.91333866 8.29607415 7.58446097 6.60297179
|
|
2.58368206 3.86609006 6.45868564 7.47924662]
|
|
|
|
mean value: 6.694372963905335
|
|
|
|
key: score_time
|
|
value: [0.04491019 0.0193634 0.02580667 0.02789092 0.0248673 0.03136349
|
|
0.01814866 0.02837729 0.02448654 0.02403283]
|
|
|
|
mean value: 0.026924729347229004
|
|
|
|
key: test_mcc
|
|
value: [0.63123793 0.58134627 0.68193178 0.58824786 1. 0.53463203
|
|
0.9544491 0.72451364 0.62964308 0.77418983]
|
|
|
|
mean value: 0.7100191509900233
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81395349 0.79069767 0.8372093 0.79069767 1. 0.76744186
|
|
0.97674419 0.86046512 0.81395349 0.88372093]
|
|
|
|
mean value: 0.8534883720930233
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.8 0.82926829 0.7804878 1. 0.76190476
|
|
0.97560976 0.86363636 0.8 0.87179487]
|
|
|
|
mean value: 0.8492225660518343
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85 0.7826087 0.89473684 0.84210526 1. 0.76190476
|
|
1. 0.82608696 0.84210526 0.94444444]
|
|
|
|
mean value: 0.8743992226944172
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.81818182 0.77272727 0.72727273 1. 0.76190476
|
|
0.95238095 0.9047619 0.76190476 0.80952381]
|
|
|
|
mean value: 0.8281385281385282
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81493506 0.79004329 0.83874459 0.79220779 1. 0.76731602
|
|
0.97619048 0.86147186 0.81277056 0.88203463]
|
|
|
|
mean value: 0.8535714285714285
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.66666667 0.70833333 0.64 1. 0.61538462
|
|
0.95238095 0.76 0.66666667 0.77272727]
|
|
|
|
mean value: 0.7462159507159507
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.09778094 0.12179923 0.07625556 0.07825637 0.16504908 0.09413481
|
|
0.17084694 0.09448075 0.09310031 0.12652278]
|
|
|
|
mean value: 0.11182267665863037
|
|
|
|
key: score_time
|
|
value: [0.03333974 0.05212307 0.01235008 0.02842474 0.04834318 0.03797722
|
|
0.03660226 0.02439833 0.02313018 0.03546667]
|
|
|
|
mean value: 0.03321554660797119
|
|
|
|
key: test_mcc
|
|
value: [0.44155844 0.16122349 0.21351219 0.72077922 0.53595916 0.16233766
|
|
0.67532468 0.50454827 0.34859132 0.4912706 ]
|
|
|
|
mean value: 0.4255105029586544
|
|
|
|
key: train_mcc
|
|
value: [0.78812563 0.80377755 0.81399076 0.77859243 0.75711768 0.86605933
|
|
0.81967357 0.76242255 0.81913359 0.77778965]
|
|
|
|
mean value: 0.7986682745861051
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.58139535 0.60465116 0.86046512 0.76744186 0.58139535
|
|
0.8372093 0.74418605 0.6744186 0.74418605]
|
|
|
|
mean value: 0.7116279069767442
|
|
|
|
key: train_accuracy
|
|
value: [0.89405685 0.90180879 0.90697674 0.88888889 0.87855297 0.93281654
|
|
0.90956072 0.88113695 0.90956072 0.88888889]
|
|
|
|
mean value: 0.8992248062015504
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.60869565 0.58536585 0.86363636 0.7826087 0.57142857
|
|
0.8372093 0.76595745 0.65 0.71794872]
|
|
|
|
mean value: 0.7110123330905096
|
|
|
|
key: train_fscore
|
|
value: [0.89405685 0.90052356 0.90625 0.88594164 0.87855297 0.93193717
|
|
0.90813648 0.88265306 0.90956072 0.88888889]
|
|
|
|
mean value: 0.8986501353235298
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.58333333 0.63157895 0.86363636 0.75 0.57142857
|
|
0.81818182 0.69230769 0.68421053 0.77777778]
|
|
|
|
mean value: 0.7099727757622495
|
|
|
|
key: train_precision
|
|
value: [0.89175258 0.91005291 0.91099476 0.9076087 0.87628866 0.94680851
|
|
0.92513369 0.87373737 0.9119171 0.89119171]
|
|
|
|
mean value: 0.9045485989721791
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.63636364 0.54545455 0.86363636 0.81818182 0.57142857
|
|
0.85714286 0.85714286 0.61904762 0.66666667]
|
|
|
|
mean value: 0.7162337662337662
|
|
|
|
key: train_recall
|
|
value: [0.89637306 0.89119171 0.9015544 0.86528497 0.88082902 0.91752577
|
|
0.89175258 0.89175258 0.90721649 0.88659794]
|
|
|
|
mean value: 0.8930078521446504
|
|
|
|
key: test_roc_auc
|
|
value: [0.72077922 0.58008658 0.60606061 0.86038961 0.76623377 0.58116883
|
|
0.83766234 0.74675325 0.67316017 0.74242424]
|
|
|
|
mean value: 0.7114718614718615
|
|
|
|
key: train_roc_auc
|
|
value: [0.89406282 0.90178142 0.90696277 0.88882805 0.87855884 0.93285615
|
|
0.90960686 0.88110945 0.9095668 0.88889482]
|
|
|
|
mean value: 0.8992227979274612
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.4375 0.4137931 0.76 0.64285714 0.4
|
|
0.72 0.62068966 0.48148148 0.56 ]
|
|
|
|
mean value: 0.5607749954387885
|
|
|
|
key: train_jcc
|
|
value: [0.80841121 0.81904762 0.82857143 0.7952381 0.78341014 0.87254902
|
|
0.83173077 0.78995434 0.83412322 0.8 ]
|
|
|
|
mean value: 0.8163035845546233
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01479411 0.01501846 0.01490593 0.01448727 0.0149529 0.01491094
|
|
0.01492023 0.01497388 0.01489806 0.01489973]
|
|
|
|
mean value: 0.014876151084899902
|
|
|
|
key: score_time
|
|
value: [0.0127027 0.01303649 0.01297402 0.01274824 0.01299381 0.01302719
|
|
0.01308703 0.0130887 0.01266432 0.01305079]
|
|
|
|
mean value: 0.012937331199645996
|
|
|
|
key: test_mcc
|
|
value: [ 0.68193178 -0.03178209 0.53463203 0.49456394 0.35868355 0.44155844
|
|
0.73471273 0.29669666 0.40939224 0.20995671]
|
|
|
|
mean value: 0.4130345986091948
|
|
|
|
key: train_mcc
|
|
value: [0.39290214 0.48160516 0.42033642 0.41972722 0.41370285 0.42376414
|
|
0.39848498 0.45362978 0.44905556 0.4451171 ]
|
|
|
|
mean value: 0.42983253533526955
|
|
|
|
key: test_accuracy
|
|
value: [0.8372093 0.48837209 0.76744186 0.74418605 0.6744186 0.72093023
|
|
0.86046512 0.62790698 0.69767442 0.60465116]
|
|
|
|
mean value: 0.7023255813953488
|
|
|
|
key: train_accuracy
|
|
value: [0.69509044 0.73901809 0.70801034 0.70801034 0.70542636 0.71059432
|
|
0.69767442 0.72609819 0.72351421 0.72093023]
|
|
|
|
mean value: 0.7134366925064599
|
|
|
|
key: test_fscore
|
|
value: [0.82926829 0.56 0.77272727 0.73170732 0.72 0.71428571
|
|
0.86956522 0.69230769 0.72340426 0.60465116]
|
|
|
|
mean value: 0.7217916924577928
|
|
|
|
key: train_fscore
|
|
value: [0.71078431 0.75305623 0.72639225 0.72506083 0.72058824 0.72682927
|
|
0.71670702 0.73762376 0.73710074 0.73786408]
|
|
|
|
mean value: 0.7292006730036351
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.5 0.77272727 0.78947368 0.64285714 0.71428571
|
|
0.8 0.58064516 0.65384615 0.59090909]
|
|
|
|
mean value: 0.6939481062231487
|
|
|
|
key: train_precision
|
|
value: [0.6744186 0.71296296 0.68181818 0.68348624 0.68372093 0.68981481
|
|
0.67579909 0.70952381 0.70422535 0.69724771]
|
|
|
|
mean value: 0.6913017687828286
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.63636364 0.77272727 0.68181818 0.81818182 0.71428571
|
|
0.95238095 0.85714286 0.80952381 0.61904762]
|
|
|
|
mean value: 0.7634199134199134
|
|
|
|
key: train_recall
|
|
value: [0.75129534 0.79792746 0.77720207 0.77202073 0.76165803 0.76804124
|
|
0.7628866 0.76804124 0.77319588 0.78350515]
|
|
|
|
mean value: 0.7715773730035789
|
|
|
|
key: test_roc_auc
|
|
value: [0.83874459 0.48484848 0.76731602 0.745671 0.67099567 0.72077922
|
|
0.86255411 0.63311688 0.70021645 0.60497835]
|
|
|
|
mean value: 0.702922077922078
|
|
|
|
key: train_roc_auc
|
|
value: [0.6952353 0.73916992 0.70818867 0.70817531 0.70557128 0.71044549
|
|
0.69750548 0.72598953 0.7233855 0.72076812]
|
|
|
|
mean value: 0.7134434592169222
|
|
|
|
key: test_jcc
|
|
value: [0.70833333 0.38888889 0.62962963 0.57692308 0.5625 0.55555556
|
|
0.76923077 0.52941176 0.56666667 0.43333333]
|
|
|
|
mean value: 0.5720473018267136
|
|
|
|
key: train_jcc
|
|
value: [0.5513308 0.60392157 0.57034221 0.56870229 0.56321839 0.57088123
|
|
0.55849057 0.58431373 0.58365759 0.58461538]
|
|
|
|
mean value: 0.573947374305626
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01928735 0.01799226 0.04592514 0.04086757 0.03325629 0.02434087
|
|
0.04326963 0.03842497 0.02026701 0.04748487]
|
|
|
|
mean value: 0.03311159610748291
|
|
|
|
key: score_time
|
|
value: [0.01329494 0.01605535 0.01757121 0.02583075 0.0133481 0.02770424
|
|
0.02780008 0.02710891 0.0231266 0.03068089]
|
|
|
|
mean value: 0.022252106666564943
|
|
|
|
key: test_mcc
|
|
value: [0.43082022 0.27790255 0.36986766 0.62964308 0.369787 0.25490741
|
|
0.60786632 0.39343507 0.15272164 0.63732414]
|
|
|
|
mean value: 0.4124275077176357
|
|
|
|
key: train_mcc
|
|
value: [0.51104387 0.53280469 0.5164767 0.72190175 0.52516542 0.78363736
|
|
0.47848443 0.60509569 0.43790144 0.72125289]
|
|
|
|
mean value: 0.5833764240666351
|
|
|
|
key: test_accuracy
|
|
value: [0.65116279 0.60465116 0.6744186 0.81395349 0.65116279 0.62790698
|
|
0.76744186 0.62790698 0.53488372 0.81395349]
|
|
|
|
mean value: 0.6767441860465117
|
|
|
|
key: train_accuracy
|
|
value: [0.71317829 0.72351421 0.71834625 0.85788114 0.71576227 0.89147287
|
|
0.68992248 0.78036176 0.66149871 0.86046512]
|
|
|
|
mean value: 0.7612403100775194
|
|
|
|
key: test_fscore
|
|
value: [0.48275862 0.71186441 0.73076923 0.82608696 0.73684211 0.6
|
|
0.80769231 0.72413793 0.66666667 0.78947368]
|
|
|
|
mean value: 0.7076291909627427
|
|
|
|
key: train_fscore
|
|
value: [0.60215054 0.78207739 0.77709611 0.86618005 0.77822581 0.88947368
|
|
0.76284585 0.81561822 0.74759152 0.86294416]
|
|
|
|
mean value: 0.7884203340208182
|
|
|
|
key: test_precision
|
|
value: [1. 0.56756757 0.63333333 0.79166667 0.6 0.63157895
|
|
0.67741935 0.56756757 0.51282051 0.88235294]
|
|
|
|
mean value: 0.6864306891339249
|
|
|
|
key: train_precision
|
|
value: [0.97674419 0.6442953 0.64189189 0.81651376 0.6369637 0.90860215
|
|
0.61858974 0.70411985 0.59692308 0.85 ]
|
|
|
|
mean value: 0.7394643659027074
|
|
|
|
key: test_recall
|
|
value: [0.31818182 0.95454545 0.86363636 0.86363636 0.95454545 0.57142857
|
|
1. 1. 0.95238095 0.71428571]
|
|
|
|
mean value: 0.8192640692640693
|
|
|
|
key: train_recall
|
|
value: [0.43523316 0.99481865 0.98445596 0.92227979 1. 0.87113402
|
|
0.99484536 0.96907216 1. 0.87628866]
|
|
|
|
mean value: 0.9048127770952407
|
|
|
|
key: test_roc_auc
|
|
value: [0.65909091 0.59632035 0.66991342 0.81277056 0.64393939 0.62662338
|
|
0.77272727 0.63636364 0.54437229 0.81168831]
|
|
|
|
mean value: 0.6773809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [0.71246194 0.72421345 0.7190321 0.85804711 0.71649485 0.89152556
|
|
0.68913252 0.77987287 0.66062176 0.86042412]
|
|
|
|
mean value: 0.7611826291330591
|
|
|
|
key: test_jcc
|
|
value: [0.31818182 0.55263158 0.57575758 0.7037037 0.58333333 0.42857143
|
|
0.67741935 0.56756757 0.5 0.65217391]
|
|
|
|
mean value: 0.5559340273944984
|
|
|
|
key: train_jcc
|
|
value: [0.43076923 0.64214047 0.63545151 0.7639485 0.6369637 0.80094787
|
|
0.61661342 0.68864469 0.59692308 0.75892857]
|
|
|
|
mean value: 0.6571331021062359
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03413296 0.01754665 0.02072024 0.05363464 0.04663396 0.03693771
|
|
0.04613781 0.04253745 0.0207994 0.01882339]
|
|
|
|
mean value: 0.033790421485900876
|
|
|
|
key: score_time
|
|
value: [0.01302671 0.01284456 0.01339936 0.0210731 0.02140474 0.02065182
|
|
0.01973844 0.02583551 0.0124917 0.01213956]
|
|
|
|
mean value: 0.01726055145263672
|
|
|
|
key: test_mcc
|
|
value: [0.32463131 0.21473308 0.48807056 0.59541363 0.57954841 0.34848485
|
|
0.72077922 0.50266669 0.21351219 0.65153277]
|
|
|
|
mean value: 0.46393727002357993
|
|
|
|
key: train_mcc
|
|
value: [0.71514737 0.71603467 0.76258185 0.69513224 0.60480936 0.71916537
|
|
0.69587799 0.67108207 0.71601841 0.6521176 ]
|
|
|
|
mean value: 0.6947966921662997
|
|
|
|
key: test_accuracy
|
|
value: [0.65116279 0.60465116 0.74418605 0.79069767 0.76744186 0.6744186
|
|
0.86046512 0.69767442 0.60465116 0.81395349]
|
|
|
|
mean value: 0.7209302325581395
|
|
|
|
key: train_accuracy
|
|
value: [0.85529716 0.85271318 0.87855297 0.83204134 0.77260982 0.85788114
|
|
0.84754522 0.81912145 0.85788114 0.81395349]
|
|
|
|
mean value: 0.8387596899224806
|
|
|
|
key: test_fscore
|
|
value: [0.59459459 0.66666667 0.75555556 0.81632653 0.72222222 0.66666667
|
|
0.85714286 0.76363636 0.62222222 0.77777778]
|
|
|
|
mean value: 0.7242811457097171
|
|
|
|
key: train_fscore
|
|
value: [0.84615385 0.86396181 0.88508557 0.85327314 0.70860927 0.86486486
|
|
0.84432718 0.84375 0.85639687 0.78571429]
|
|
|
|
mean value: 0.8352136837990035
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.5862069 0.73913043 0.74074074 0.92857143 0.66666667
|
|
0.85714286 0.61764706 0.58333333 0.93333333]
|
|
|
|
mean value: 0.7386106083279556
|
|
|
|
key: train_precision
|
|
value: [0.9005848 0.80088496 0.83796296 0.756 0.98165138 0.82629108
|
|
0.86486486 0.74409449 0.86772487 0.92957746]
|
|
|
|
mean value: 0.850963685556325
|
|
|
|
key: test_recall
|
|
value: [0.5 0.77272727 0.77272727 0.90909091 0.59090909 0.66666667
|
|
0.85714286 1. 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7402597402597403
|
|
|
|
key: train_recall
|
|
value: [0.79792746 0.93782383 0.93782383 0.97927461 0.55440415 0.90721649
|
|
0.82474227 0.9742268 0.84536082 0.68041237]
|
|
|
|
mean value: 0.8439212648896961
|
|
|
|
key: test_roc_auc
|
|
value: [0.6547619 0.60064935 0.74350649 0.78787879 0.77164502 0.67424242
|
|
0.86038961 0.70454545 0.60606061 0.81060606]
|
|
|
|
mean value: 0.7214285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.8551493 0.85293254 0.87870573 0.83242081 0.77204743 0.85775333
|
|
0.84760429 0.81871962 0.85791357 0.81429945]
|
|
|
|
mean value: 0.8387546071256877
|
|
|
|
key: test_jcc
|
|
value: [0.42307692 0.5 0.60714286 0.68965517 0.56521739 0.5
|
|
0.75 0.61764706 0.4516129 0.63636364]
|
|
|
|
mean value: 0.5740715942350894
|
|
|
|
key: train_jcc
|
|
value: [0.73333333 0.7605042 0.79385965 0.74409449 0.54871795 0.76190476
|
|
0.73059361 0.72972973 0.74885845 0.64705882]
|
|
|
|
mean value: 0.7198654991002161
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21367502 0.21543741 0.25512004 0.2126348 0.27796173 0.21113563
|
|
0.21479535 0.21336484 0.21369362 0.21332502]
|
|
|
|
mean value: 0.22411434650421141
|
|
|
|
key: score_time
|
|
value: [0.02114868 0.02118158 0.02099514 0.02095175 0.02381182 0.02107692
|
|
0.02110982 0.02105737 0.02100515 0.02121067]
|
|
|
|
mean value: 0.02135488986968994
|
|
|
|
key: test_mcc
|
|
value: [0.3961039 0.48917749 0.64040632 0.58824786 0.82901914 0.3961039
|
|
0.81385281 0.65585036 0.44227524 0.73248017]
|
|
|
|
mean value: 0.5983517174178976
|
|
|
|
key: train_mcc
|
|
value: [0.95870837 0.92259409 0.9329309 0.95865605 0.92769958 0.94316543
|
|
0.93803254 0.94326318 0.96383644 0.94316391]
|
|
|
|
mean value: 0.9432050491425249
|
|
|
|
key: test_accuracy
|
|
value: [0.69767442 0.74418605 0.81395349 0.79069767 0.90697674 0.69767442
|
|
0.90697674 0.81395349 0.72093023 0.86046512]
|
|
|
|
mean value: 0.7953488372093023
|
|
|
|
key: train_accuracy
|
|
value: [0.97932817 0.96124031 0.96640827 0.97932817 0.96382429 0.97157623
|
|
0.96899225 0.97157623 0.98191214 0.97157623]
|
|
|
|
mean value: 0.9715762273901809
|
|
|
|
key: test_fscore
|
|
value: [0.69767442 0.74418605 0.8 0.7804878 0.9 0.69767442
|
|
0.9047619 0.83333333 0.7 0.84210526]
|
|
|
|
mean value: 0.7900223189852111
|
|
|
|
key: train_fscore
|
|
value: [0.97938144 0.96143959 0.96658098 0.97927461 0.96391753 0.97157623
|
|
0.96923077 0.97186701 0.98200514 0.97172237]
|
|
|
|
mean value: 0.9716995656744147
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.76190476 0.88888889 0.84210526 1. 0.68181818
|
|
0.9047619 0.74074074 0.73684211 0.94117647]
|
|
|
|
mean value: 0.821252403140948
|
|
|
|
key: train_precision
|
|
value: [0.97435897 0.95408163 0.95918367 0.97927461 0.95897436 0.97409326
|
|
0.96428571 0.96446701 0.97948718 0.96923077]
|
|
|
|
mean value: 0.9677437183183256
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.72727273 0.72727273 0.72727273 0.81818182 0.71428571
|
|
0.9047619 0.95238095 0.66666667 0.76190476]
|
|
|
|
mean value: 0.7681818181818182
|
|
|
|
key: train_recall
|
|
value: [0.98445596 0.96891192 0.97409326 0.97927461 0.96891192 0.96907216
|
|
0.9742268 0.97938144 0.98453608 0.9742268 ]
|
|
|
|
mean value: 0.9757090967362855
|
|
|
|
key: test_roc_auc
|
|
value: [0.69805195 0.74458874 0.81601732 0.79220779 0.90909091 0.69805195
|
|
0.90692641 0.81709957 0.71969697 0.85822511]
|
|
|
|
mean value: 0.795995670995671
|
|
|
|
key: train_roc_auc
|
|
value: [0.97934138 0.96126008 0.96642808 0.97932803 0.9638374 0.97158271
|
|
0.96897869 0.97155601 0.98190535 0.97156936]
|
|
|
|
mean value: 0.971578708402329
|
|
|
|
key: test_jcc
|
|
value: [0.53571429 0.59259259 0.66666667 0.64 0.81818182 0.53571429
|
|
0.82608696 0.71428571 0.53846154 0.72727273]
|
|
|
|
mean value: 0.6594976585411368
|
|
|
|
key: train_jcc
|
|
value: [0.95959596 0.92574257 0.93532338 0.95939086 0.93034826 0.94472362
|
|
0.94029851 0.94527363 0.96464646 0.945 ]
|
|
|
|
mean value: 0.9450343260628992
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08050752 0.08281493 0.08780646 0.12561631 0.10441041 0.08479404
|
|
0.14258456 0.13446379 0.12838745 0.14794612]
|
|
|
|
mean value: 0.11193315982818604
|
|
|
|
key: score_time
|
|
value: [0.02324915 0.02340794 0.03211308 0.02946973 0.02444458 0.02654386
|
|
0.09909654 0.02820015 0.02767539 0.02461576]
|
|
|
|
mean value: 0.033881616592407224
|
|
|
|
key: test_mcc
|
|
value: [0.51986413 0.20995671 0.58824786 0.59970431 0.86929961 0.5421681
|
|
0.86117339 0.67462198 0.58134627 0.723327 ]
|
|
|
|
mean value: 0.6169709354435585
|
|
|
|
key: train_mcc
|
|
value: [0.96393847 0.96445208 0.94912625 0.95350142 0.96945581 0.98461498
|
|
0.94418052 0.9485255 0.93818785 0.96414836]
|
|
|
|
mean value: 0.958013121376555
|
|
|
|
key: test_accuracy
|
|
value: [0.74418605 0.60465116 0.79069767 0.79069767 0.93023256 0.76744186
|
|
0.93023256 0.8372093 0.79069767 0.86046512]
|
|
|
|
mean value: 0.8046511627906977
|
|
|
|
key: train_accuracy
|
|
value: [0.98191214 0.98191214 0.97416021 0.97674419 0.98449612 0.99224806
|
|
0.97157623 0.97416021 0.96899225 0.98191214]
|
|
|
|
mean value: 0.9788113695090439
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.7027027 0.60465116 0.7804878 0.76923077 0.92682927 0.73684211
|
|
0.92682927 0.82926829 0.7804878 0.85 ]
|
|
|
|
mean value: 0.7907329179011718
|
|
|
|
key: train_fscore
|
|
value: [0.98172324 0.98153034 0.97354497 0.97674419 0.98421053 0.99220779
|
|
0.97097625 0.97395833 0.96875 0.98172324]
|
|
|
|
mean value: 0.9785368882950292
|
|
|
|
key: test_precision
|
|
value: [0.86666667 0.61904762 0.84210526 0.88235294 1. 0.82352941
|
|
0.95 0.85 0.8 0.89473684]
|
|
|
|
mean value: 0.852843874391862
|
|
|
|
key: train_precision
|
|
value: [0.98947368 1. 0.99459459 0.9742268 1. 1.
|
|
0.99459459 0.98421053 0.97894737 0.99470899]
|
|
|
|
mean value: 0.9910756566969263
|
|
|
|
key: test_recall
|
|
value: [0.59090909 0.59090909 0.72727273 0.68181818 0.86363636 0.66666667
|
|
0.9047619 0.80952381 0.76190476 0.80952381]
|
|
|
|
mean value: 0.7406926406926407
|
|
|
|
key: train_recall
|
|
value: [0.97409326 0.96373057 0.95336788 0.97927461 0.96891192 0.98453608
|
|
0.94845361 0.96391753 0.95876289 0.96907216]
|
|
|
|
mean value: 0.9664120506383206
|
|
|
|
key: test_roc_auc
|
|
value: [0.7478355 0.60497835 0.79220779 0.79329004 0.93181818 0.76515152
|
|
0.92965368 0.83658009 0.79004329 0.85930736]
|
|
|
|
mean value: 0.8050865800865801
|
|
|
|
key: train_roc_auc
|
|
value: [0.98189199 0.98186528 0.97410662 0.97675071 0.98445596 0.99226804
|
|
0.97163613 0.97418674 0.96901875 0.98194541]
|
|
|
|
mean value: 0.9788125634314406
|
|
|
|
key: test_jcc
|
|
value: [0.54166667 0.43333333 0.64 0.625 0.86363636 0.58333333
|
|
0.86363636 0.70833333 0.64 0.73913043]
|
|
|
|
mean value: 0.6638069828722003
|
|
|
|
key: train_jcc
|
|
value: [0.96410256 0.96373057 0.94845361 0.95454545 0.96891192 0.98453608
|
|
0.94358974 0.94923858 0.93939394 0.96410256]
|
|
|
|
mean value: 0.9580605022182751
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.23403645 0.16865182 0.19663453 0.22662783 0.18694973 0.22256374
|
|
0.20538998 0.18926692 0.21955013 0.21190357]
|
|
|
|
mean value: 0.20615746974945068
|
|
|
|
key: score_time
|
|
value: [0.0319984 0.0327487 0.03266263 0.03267264 0.03217125 0.03329086
|
|
0.03301549 0.03223681 0.03828764 0.04429054]
|
|
|
|
mean value: 0.03433749675750732
|
|
|
|
key: test_mcc
|
|
value: [0.3961039 0.2567 0.53463203 0.44468651 0.58225108 0.0287681
|
|
0.44155844 0.30151915 0.25541126 0.45629995]
|
|
|
|
mean value: 0.3697930425744557
|
|
|
|
key: train_mcc
|
|
value: [0.96919751 0.96393847 0.96919751 0.97937979 0.97427611 0.97417339
|
|
0.97932803 0.97427816 0.96920078 0.97427816]
|
|
|
|
mean value: 0.9727247919177897
|
|
|
|
key: test_accuracy
|
|
value: [0.69767442 0.62790698 0.76744186 0.72093023 0.79069767 0.51162791
|
|
0.72093023 0.65116279 0.62790698 0.72093023]
|
|
|
|
mean value: 0.6837209302325581
|
|
|
|
key: train_accuracy
|
|
value: [0.98449612 0.98191214 0.98449612 0.98966408 0.9870801 0.9870801
|
|
0.98966408 0.9870801 0.98449612 0.9870801 ]
|
|
|
|
mean value: 0.9863049095607235
|
|
|
|
key: test_fscore
|
|
value: [0.69767442 0.66666667 0.77272727 0.71428571 0.79069767 0.55319149
|
|
0.71428571 0.63414634 0.61904762 0.66666667]
|
|
|
|
mean value: 0.6829389577528027
|
|
|
|
key: train_fscore
|
|
value: [0.98429319 0.98172324 0.98429319 0.98958333 0.98694517 0.9870801
|
|
0.98969072 0.98701299 0.984375 0.98701299]
|
|
|
|
mean value: 0.9862009927113224
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.61538462 0.77272727 0.75 0.80952381 0.5
|
|
0.71428571 0.65 0.61904762 0.8 ]
|
|
|
|
mean value: 0.6945254745254745
|
|
|
|
key: train_precision
|
|
value: [0.99470899 0.98947368 0.99470899 0.9947644 0.99473684 0.98963731
|
|
0.98969072 0.9947644 0.99473684 0.9947644 ]
|
|
|
|
mean value: 0.9931986578905286
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.72727273 0.77272727 0.68181818 0.77272727 0.61904762
|
|
0.71428571 0.61904762 0.61904762 0.57142857]
|
|
|
|
mean value: 0.6779220779220779
|
|
|
|
key: train_recall
|
|
value: [0.97409326 0.97409326 0.97409326 0.98445596 0.97927461 0.98453608
|
|
0.98969072 0.97938144 0.9742268 0.97938144]
|
|
|
|
mean value: 0.9793226857539661
|
|
|
|
key: test_roc_auc
|
|
value: [0.69805195 0.62554113 0.76731602 0.72186147 0.79112554 0.51406926
|
|
0.72077922 0.6504329 0.62770563 0.71753247]
|
|
|
|
mean value: 0.6834415584415584
|
|
|
|
key: train_roc_auc
|
|
value: [0.98446931 0.98189199 0.98446931 0.98965066 0.98705999 0.98708669
|
|
0.98966401 0.98710005 0.98452273 0.98710005]
|
|
|
|
mean value: 0.986301479621815
|
|
|
|
key: test_jcc
|
|
value: [0.53571429 0.5 0.62962963 0.55555556 0.65384615 0.38235294
|
|
0.55555556 0.46428571 0.44827586 0.5 ]
|
|
|
|
mean value: 0.522521569783233
|
|
|
|
key: train_jcc
|
|
value: [0.96907216 0.96410256 0.96907216 0.97938144 0.9742268 0.9744898
|
|
0.97959184 0.97435897 0.96923077 0.97435897]
|
|
|
|
mean value: 0.9727885492023931
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.80391073 0.82702971 0.87435555 0.93781734 0.8904283 0.84583449
|
|
0.83670449 0.85518622 0.98863912 0.9060173 ]
|
|
|
|
mean value: 0.8765923261642456
|
|
|
|
key: score_time
|
|
value: [0.01313496 0.01316261 0.01317477 0.01316571 0.01292896 0.01285672
|
|
0.02555585 0.0127821 0.02496505 0.0128355 ]
|
|
|
|
mean value: 0.015456223487854004
|
|
|
|
key: test_mcc
|
|
value: [0.63123793 0.44155844 0.81778934 0.55959928 0.86117339 0.58824786
|
|
0.9544491 0.76839827 0.53463203 0.86117339]
|
|
|
|
mean value: 0.7018259039875132
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 0.99484522
|
|
1. 1. 1. 0.99484522]
|
|
|
|
mean value: 0.9989690447011859
|
|
|
|
key: test_accuracy
|
|
value: [0.81395349 0.72093023 0.90697674 0.76744186 0.93023256 0.79069767
|
|
0.97674419 0.88372093 0.76744186 0.93023256]
|
|
|
|
mean value: 0.8488372093023255
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.99741602
|
|
1. 1. 1. 0.99741602]
|
|
|
|
mean value: 0.999483204134367
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.72727273 0.9047619 0.73684211 0.93333333 0.8
|
|
0.97560976 0.88372093 0.76190476 0.92682927]
|
|
|
|
mean value: 0.8459798596682496
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 0.99742931
|
|
1. 1. 1. 0.99742931]
|
|
|
|
mean value: 0.9994858611825193
|
|
|
|
key: test_precision
|
|
value: [0.85 0.72727273 0.95 0.875 0.91304348 0.75
|
|
1. 0.86363636 0.76190476 0.95 ]
|
|
|
|
mean value: 0.8640857331074723
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.99487179
|
|
1. 1. 1. 0.99487179]
|
|
|
|
mean value: 0.9989743589743589
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.72727273 0.86363636 0.63636364 0.95454545 0.85714286
|
|
0.95238095 0.9047619 0.76190476 0.9047619 ]
|
|
|
|
mean value: 0.8335497835497836
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81493506 0.72077922 0.90800866 0.77056277 0.92965368 0.79220779
|
|
0.97619048 0.88419913 0.76731602 0.92965368]
|
|
|
|
mean value: 0.8493506493506493
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.99740933
|
|
1. 1. 1. 0.99740933]
|
|
|
|
mean value: 0.9994818652849741
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.57142857 0.82608696 0.58333333 0.875 0.66666667
|
|
0.95238095 0.79166667 0.61538462 0.86363636]
|
|
|
|
mean value: 0.7425584126018908
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 0.99487179
|
|
1. 1. 1. 0.99487179]
|
|
|
|
mean value: 0.9989743589743589
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.08959627 0.09128714 0.10529256 0.07038856 0.07038212 0.07437181
|
|
0.12109137 0.10772467 0.06410241 0.20232749]
|
|
|
|
mean value: 0.09965643882751465
|
|
|
|
key: score_time
|
|
value: [0.02013779 0.02048373 0.02002215 0.02515244 0.02193284 0.03954339
|
|
0.03085089 0.02007246 0.01990652 0.01039696]
|
|
|
|
mean value: 0.0228499174118042
|
|
|
|
key: test_mcc
|
|
value: [0.58225108 0.32531323 0.20824344 0.16726499 0.59541363 0.06638793
|
|
0.30151915 0.42224772 0.50454827 0.35748709]
|
|
|
|
mean value: 0.35306765339248786
|
|
|
|
key: train_mcc
|
|
value: [0.81354434 0.73174697 0.94878037 0.60464608 0.89211899 0.91973714
|
|
0.92458182 0.9276481 0.92278309 0.63983041]
|
|
|
|
mean value: 0.8325417304390078
|
|
|
|
key: test_accuracy
|
|
value: [0.79069767 0.62790698 0.60465116 0.58139535 0.79069767 0.53488372
|
|
0.65116279 0.69767442 0.74418605 0.6744186 ]
|
|
|
|
mean value: 0.6697674418604651
|
|
|
|
key: train_accuracy
|
|
value: [0.89922481 0.8501292 0.97416021 0.76744186 0.94315245 0.95865633
|
|
0.96124031 0.96382429 0.96124031 0.79069767]
|
|
|
|
mean value: 0.9069767441860466
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.72413793 0.62222222 0.65384615 0.81632653 0.47368421
|
|
0.63414634 0.73469388 0.76595745 0.69565217]
|
|
|
|
mean value: 0.6911364562396014
|
|
|
|
key: train_fscore
|
|
value: [0.90780142 0.86877828 0.9744898 0.81092437 0.94607843 0.95721925
|
|
0.96 0.96391753 0.96183206 0.82729211]
|
|
|
|
mean value: 0.9178333245074515
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.58333333 0.60869565 0.56666667 0.74074074 0.52941176
|
|
0.65 0.64285714 0.69230769 0.64 ]
|
|
|
|
mean value: 0.6463536802309181
|
|
|
|
key: train_precision
|
|
value: [0.83478261 0.77108434 0.95979899 0.6819788 0.89767442 0.99444444
|
|
0.99447514 0.96391753 0.94974874 0.70545455]
|
|
|
|
mean value: 0.8753359555723473
|
|
|
|
key: test_recall
|
|
value: [0.77272727 0.95454545 0.63636364 0.77272727 0.90909091 0.42857143
|
|
0.61904762 0.85714286 0.85714286 0.76190476]
|
|
|
|
mean value: 0.7569264069264069
|
|
|
|
key: train_recall
|
|
value: [0.99481865 0.99481865 0.98963731 1. 1. 0.92268041
|
|
0.92783505 0.96391753 0.9742268 1. ]
|
|
|
|
mean value: 0.9767934405213397
|
|
|
|
key: test_roc_auc
|
|
value: [0.79112554 0.62012987 0.6038961 0.57683983 0.78787879 0.53246753
|
|
0.6504329 0.7012987 0.74675325 0.67640693]
|
|
|
|
mean value: 0.6687229437229437
|
|
|
|
key: train_roc_auc
|
|
value: [0.89947118 0.85050211 0.9742001 0.76804124 0.94329897 0.95874953
|
|
0.96132685 0.96382405 0.96120667 0.79015544]
|
|
|
|
mean value: 0.9070776133753539
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.56756757 0.4516129 0.48571429 0.68965517 0.31034483
|
|
0.46428571 0.58064516 0.62068966 0.53333333]
|
|
|
|
mean value: 0.5357694774435597
|
|
|
|
key: train_jcc
|
|
value: [0.83116883 0.768 0.95024876 0.6819788 0.89767442 0.91794872
|
|
0.92307692 0.93034826 0.92647059 0.70545455]
|
|
|
|
mean value: 0.8532369838000908
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01539373 0.01527166 0.01535821 0.04245615 0.0374465 0.03744864
|
|
0.03409624 0.03687501 0.03697228 0.03376293]
|
|
|
|
mean value: 0.030508136749267577
|
|
|
|
key: score_time
|
|
value: [0.01220465 0.01212001 0.01203203 0.01598191 0.02313375 0.02380562
|
|
0.0238564 0.02015162 0.0226481 0.02359152]
|
|
|
|
mean value: 0.018952560424804688
|
|
|
|
key: test_mcc
|
|
value: [0.44155844 0.20835137 0.49456394 0.67532468 0.62964308 0.20824344
|
|
0.76789769 0.51986413 0.3961039 0.63732414]
|
|
|
|
mean value: 0.49788747885318635
|
|
|
|
key: train_mcc
|
|
value: [0.75720506 0.75208381 0.74703465 0.75718561 0.74177263 0.78294429
|
|
0.75193633 0.74677493 0.75193633 0.75734102]
|
|
|
|
mean value: 0.7546214639709004
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.60465116 0.74418605 0.8372093 0.81395349 0.60465116
|
|
0.88372093 0.74418605 0.69767442 0.81395349]
|
|
|
|
mean value: 0.7465116279069768
|
|
|
|
key: train_accuracy
|
|
value: [0.87855297 0.87596899 0.87338501 0.87855297 0.87080103 0.89147287
|
|
0.87596899 0.87338501 0.87596899 0.87855297]
|
|
|
|
mean value: 0.8772609819121447
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.63829787 0.73170732 0.8372093 0.82608696 0.58536585
|
|
0.87804878 0.7755102 0.69767442 0.78947368]
|
|
|
|
mean value: 0.7486647116576796
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.87917738 0.87434555 0.87468031 0.8772846 0.87179487 0.89175258
|
|
0.87628866 0.87403599 0.87628866 0.88040712]
|
|
|
|
mean value: 0.8776055712937129
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.6 0.78947368 0.85714286 0.79166667 0.6
|
|
0.9 0.67857143 0.68181818 0.88235294]
|
|
|
|
mean value: 0.7508298486858859
|
|
|
|
key: train_precision
|
|
value: [0.87244898 0.88359788 0.86363636 0.88421053 0.86294416 0.89175258
|
|
0.87628866 0.87179487 0.87628866 0.86934673]
|
|
|
|
mean value: 0.8752309417948851
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.68181818 0.68181818 0.81818182 0.86363636 0.57142857
|
|
0.85714286 0.9047619 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7534632034632035
|
|
|
|
key: train_recall
|
|
value: [0.88601036 0.86528497 0.88601036 0.87046632 0.88082902 0.89175258
|
|
0.87628866 0.87628866 0.87628866 0.89175258]
|
|
|
|
mean value: 0.8800972170290049
|
|
|
|
key: test_roc_auc
|
|
value: [0.72077922 0.60281385 0.745671 0.83766234 0.81277056 0.6038961
|
|
0.88311688 0.7478355 0.69805195 0.81168831]
|
|
|
|
mean value: 0.7464285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.87857219 0.87594146 0.87341755 0.87853213 0.87082688 0.89147214
|
|
0.87596816 0.87337749 0.87596816 0.87851878]
|
|
|
|
mean value: 0.877259494685113
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.46875 0.57692308 0.72 0.7037037 0.4137931
|
|
0.7826087 0.63333333 0.53571429 0.65217391]
|
|
|
|
mean value: 0.6058428683246899
|
|
|
|
key: train_jcc
|
|
value: [0.78440367 0.77674419 0.77727273 0.78139535 0.77272727 0.80465116
|
|
0.77981651 0.77625571 0.77981651 0.78636364]
|
|
|
|
mean value: 0.7819446739048318
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.20661426 0.38068581 0.32723403 0.37064242 0.38206387 0.34253049
|
|
0.34622693 0.36653638 0.34563804 0.40613103]
|
|
|
|
mean value: 0.34743032455444334
|
|
|
|
key: score_time
|
|
value: [0.02376938 0.02099776 0.02022552 0.02396584 0.0239017 0.02375197
|
|
0.0239141 0.02399564 0.02379417 0.02396965]
|
|
|
|
mean value: 0.0232285737991333
|
|
|
|
key: test_mcc
|
|
value: [0.3961039 0.4912706 0.49456394 0.72451364 0.723327 0.39479486
|
|
0.81385281 0.51986413 0.26318068 0.63732414]
|
|
|
|
mean value: 0.5458795675605433
|
|
|
|
key: train_mcc
|
|
value: [0.68476577 0.72095943 0.74703465 0.68476577 0.68053636 0.70564037
|
|
0.6434123 0.74677493 0.68517152 0.69551524]
|
|
|
|
mean value: 0.6994576344704969
|
|
|
|
key: test_accuracy
|
|
value: [0.69767442 0.74418605 0.74418605 0.86046512 0.86046512 0.69767442
|
|
0.90697674 0.74418605 0.62790698 0.81395349]
|
|
|
|
mean value: 0.7697674418604651
|
|
|
|
key: train_accuracy
|
|
value: [0.84237726 0.86046512 0.87338501 0.84237726 0.83979328 0.85271318
|
|
0.82170543 0.87338501 0.84237726 0.84754522]
|
|
|
|
mean value: 0.8496124031007752
|
|
|
|
key: test_fscore
|
|
value: [0.69767442 0.76595745 0.73170732 0.85714286 0.86956522 0.68292683
|
|
0.9047619 0.7755102 0.65217391 0.78947368]
|
|
|
|
mean value: 0.7726893792386329
|
|
|
|
key: train_fscore
|
|
value: [0.84237726 0.859375 0.87468031 0.84237726 0.84343434 0.85496183
|
|
0.82262211 0.87403599 0.84556962 0.85063291]
|
|
|
|
mean value: 0.8510066633696552
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.72 0.78947368 0.9 0.83333333 0.7
|
|
0.9047619 0.67857143 0.6 0.88235294]
|
|
|
|
mean value: 0.7722779006339378
|
|
|
|
key: train_precision
|
|
value: [0.84020619 0.86387435 0.86363636 0.84020619 0.8226601 0.84422111
|
|
0.82051282 0.87179487 0.83084577 0.8358209 ]
|
|
|
|
mean value: 0.8433778643344287
|
|
|
|
key: test_recall
|
|
value: [0.68181818 0.81818182 0.68181818 0.81818182 0.90909091 0.66666667
|
|
0.9047619 0.9047619 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7813852813852814
|
|
|
|
key: train_recall
|
|
value: [0.84455959 0.85492228 0.88601036 0.84455959 0.86528497 0.86597938
|
|
0.82474227 0.87628866 0.86082474 0.86597938]
|
|
|
|
mean value: 0.8589151220554457
|
|
|
|
key: test_roc_auc
|
|
value: [0.69805195 0.74242424 0.745671 0.86147186 0.85930736 0.6969697
|
|
0.90692641 0.7478355 0.62987013 0.81168831]
|
|
|
|
mean value: 0.770021645021645
|
|
|
|
key: train_roc_auc
|
|
value: [0.84238289 0.86045083 0.87341755 0.84238289 0.83985898 0.85267881
|
|
0.82169756 0.87337749 0.84232947 0.84749746]
|
|
|
|
mean value: 0.8496073927674803
|
|
|
|
key: test_jcc
|
|
value: [0.53571429 0.62068966 0.57692308 0.75 0.76923077 0.51851852
|
|
0.82608696 0.63333333 0.48387097 0.65217391]
|
|
|
|
mean value: 0.636654147619955
|
|
|
|
key: train_jcc
|
|
value: [0.72767857 0.75342466 0.77727273 0.72767857 0.72925764 0.74666667
|
|
0.69868996 0.77625571 0.73245614 0.74008811]
|
|
|
|
mean value: 0.7409468746424365
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06141329 0.0454545 0.0357089 0.03677487 0.05627537 0.0584569
|
|
0.03606534 0.02807307 0.0350523 0.03440022]
|
|
|
|
mean value: 0.04276747703552246
|
|
|
|
key: score_time
|
|
value: [0.0240252 0.01387882 0.01382113 0.01195884 0.01689172 0.01412845
|
|
0.01413083 0.01187396 0.01419377 0.01422119]
|
|
|
|
mean value: 0.01491239070892334
|
|
|
|
key: test_mcc
|
|
value: [0.5007734 0.77777778 0.61059098 0.66229864 0.19802951 0.60130719
|
|
0.54754393 0.37340802 0.54458115 0.60130719]
|
|
|
|
mean value: 0.5417617785378652
|
|
|
|
key: train_mcc
|
|
value: [0.74779462 0.70891756 0.71609411 0.75395088 0.76030097 0.7350822
|
|
0.73501314 0.72244119 0.70982126 0.72885068]
|
|
|
|
mean value: 0.7318266584515561
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.88888889 0.8 0.82857143 0.6 0.8
|
|
0.77142857 0.68571429 0.77142857 0.8 ]
|
|
|
|
mean value: 0.7696031746031746
|
|
|
|
key: train_accuracy
|
|
value: [0.87341772 0.85443038 0.85804416 0.87697161 0.88012618 0.86750789
|
|
0.86750789 0.86119874 0.85488959 0.86435331]
|
|
|
|
mean value: 0.8658447470350996
|
|
|
|
key: test_fscore
|
|
value: [0.75675676 0.88888889 0.81081081 0.83333333 0.5625 0.8
|
|
0.76470588 0.71794872 0.78947368 0.8 ]
|
|
|
|
mean value: 0.7724418074301975
|
|
|
|
key: train_fscore
|
|
value: [0.87012987 0.85350318 0.85893417 0.87774295 0.88125 0.86708861
|
|
0.86708861 0.85987261 0.85350318 0.86520376]
|
|
|
|
mean value: 0.865431694395441
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.88888889 0.75 0.78947368 0.6 0.77777778
|
|
0.8125 0.66666667 0.75 0.82352941]
|
|
|
|
mean value: 0.7595678534571724
|
|
|
|
key: train_precision
|
|
value: [0.89333333 0.85897436 0.85625 0.875 0.8757764 0.87261146
|
|
0.86708861 0.86538462 0.85897436 0.85714286]
|
|
|
|
mean value: 0.8680535993888141
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.88888889 0.88235294 0.88235294 0.52941176 0.82352941
|
|
0.72222222 0.77777778 0.83333333 0.77777778]
|
|
|
|
mean value: 0.7895424836601307
|
|
|
|
key: train_recall
|
|
value: [0.84810127 0.84810127 0.86163522 0.88050314 0.88679245 0.86163522
|
|
0.86708861 0.85443038 0.84810127 0.87341772]
|
|
|
|
mean value: 0.8629806544064963
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.88888889 0.80228758 0.83006536 0.59803922 0.80065359
|
|
0.77287582 0.68300654 0.76960784 0.80065359]
|
|
|
|
mean value: 0.7696078431372549
|
|
|
|
key: train_roc_auc
|
|
value: [0.87341772 0.85443038 0.8580328 0.87696043 0.88010509 0.86752647
|
|
0.86750657 0.86117745 0.85486824 0.86438182]
|
|
|
|
mean value: 0.8658406973967041
|
|
|
|
key: test_jcc
|
|
value: [0.60869565 0.8 0.68181818 0.71428571 0.39130435 0.66666667
|
|
0.61904762 0.56 0.65217391 0.66666667]
|
|
|
|
mean value: 0.6360658761528327
|
|
|
|
key: train_jcc
|
|
value: [0.77011494 0.74444444 0.75274725 0.78212291 0.7877095 0.76536313
|
|
0.76536313 0.75418994 0.74444444 0.76243094]
|
|
|
|
mean value: 0.7628930626743352
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.96169853 1.06569672 0.93234968 1.18725181 0.91562414 0.9073813
|
|
1.05254364 1.10178089 1.07017517 1.11856723]
|
|
|
|
mean value: 1.0313069105148316
|
|
|
|
key: score_time
|
|
value: [0.01202536 0.01782393 0.01223016 0.02358055 0.0121913 0.012362
|
|
0.01212907 0.02116275 0.01963162 0.01449251]
|
|
|
|
mean value: 0.015762925148010254
|
|
|
|
key: test_mcc
|
|
value: [0.5007734 0.72333935 0.60130719 0.66229864 0.25671802 0.4869281
|
|
0.66009836 0.31354672 0.54458115 0.60130719]
|
|
|
|
mean value: 0.5350898118358979
|
|
|
|
key: train_mcc
|
|
value: [0.7215768 0.77862138 0.74143974 0.70984435 0.7350822 0.71609411
|
|
0.67853599 0.75395088 0.76032005 0.74788981]
|
|
|
|
mean value: 0.7343355311093412
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.86111111 0.8 0.82857143 0.62857143 0.74285714
|
|
0.82857143 0.65714286 0.77142857 0.8 ]
|
|
|
|
mean value: 0.7668253968253969
|
|
|
|
key: train_accuracy
|
|
value: [0.86075949 0.88924051 0.87066246 0.85488959 0.86750789 0.85804416
|
|
0.83911672 0.87697161 0.88012618 0.87381703]
|
|
|
|
mean value: 0.8671135646687698
|
|
|
|
key: test_fscore
|
|
value: [0.75675676 0.85714286 0.8 0.83333333 0.58064516 0.74285714
|
|
0.84210526 0.68421053 0.78947368 0.8 ]
|
|
|
|
mean value: 0.7686524725064623
|
|
|
|
key: train_fscore
|
|
value: [0.85987261 0.88817891 0.87227414 0.85443038 0.86708861 0.85893417
|
|
0.83601286 0.87619048 0.88050314 0.875 ]
|
|
|
|
mean value: 0.8668485307706836
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.88235294 0.77777778 0.78947368 0.64285714 0.72222222
|
|
0.8 0.65 0.75 0.82352941]
|
|
|
|
mean value: 0.7575055285272003
|
|
|
|
key: train_precision
|
|
value: [0.86538462 0.89677419 0.86419753 0.85987261 0.87261146 0.85625
|
|
0.8496732 0.87898089 0.875 0.86419753]
|
|
|
|
mean value: 0.8682942041428643
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.83333333 0.82352941 0.88235294 0.52941176 0.76470588
|
|
0.88888889 0.72222222 0.83333333 0.77777778]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.85443038 0.87974684 0.88050314 0.8490566 0.86163522 0.86163522
|
|
0.82278481 0.87341772 0.88607595 0.88607595]
|
|
|
|
mean value: 0.8655361834248866
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.86111111 0.80065359 0.83006536 0.62581699 0.74346405
|
|
0.82679739 0.65522876 0.76960784 0.80065359]
|
|
|
|
mean value: 0.7663398692810457
|
|
|
|
key: train_roc_auc
|
|
value: [0.86075949 0.88924051 0.87063132 0.85490805 0.86752647 0.8580328
|
|
0.83906536 0.87696043 0.88014489 0.87385558]
|
|
|
|
mean value: 0.8671124910437067
|
|
|
|
key: test_jcc
|
|
value: [0.60869565 0.75 0.66666667 0.71428571 0.40909091 0.59090909
|
|
0.72727273 0.52 0.65217391 0.66666667]
|
|
|
|
mean value: 0.6305761340109166
|
|
|
|
key: train_jcc
|
|
value: [0.75418994 0.79885057 0.77348066 0.74585635 0.76536313 0.75274725
|
|
0.71823204 0.77966102 0.78651685 0.77777778]
|
|
|
|
mean value: 0.765267560951859
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01380801 0.00991035 0.00968146 0.00964808 0.00931549 0.00928283
|
|
0.00930619 0.00949693 0.00936675 0.00932574]
|
|
|
|
mean value: 0.009914183616638183
|
|
|
|
key: score_time
|
|
value: [0.01205993 0.00925589 0.00926042 0.00894856 0.00876904 0.00881219
|
|
0.00875783 0.00880146 0.00878906 0.00873899]
|
|
|
|
mean value: 0.00921933650970459
|
|
|
|
key: test_mcc
|
|
value: [0.23570226 0.35355339 0.34908996 0.5289947 0.03300492 0.39285636
|
|
0.57348878 0.38195106 0.38195106 0.42397369]
|
|
|
|
mean value: 0.36545661866638957
|
|
|
|
key: train_mcc
|
|
value: [0.37958125 0.37616279 0.40833467 0.38788612 0.40373937 0.39377873
|
|
0.37666364 0.38474063 0.41515065 0.37598307]
|
|
|
|
mean value: 0.3902020926867929
|
|
|
|
key: test_accuracy
|
|
value: [0.61111111 0.66666667 0.6 0.71428571 0.51428571 0.65714286
|
|
0.77142857 0.68571429 0.68571429 0.68571429]
|
|
|
|
mean value: 0.6592063492063492
|
|
|
|
key: train_accuracy
|
|
value: [0.67405063 0.67088608 0.68769716 0.67192429 0.68769716 0.67823344
|
|
0.67192429 0.67507886 0.68454259 0.67507886]
|
|
|
|
mean value: 0.677711336501218
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.71428571 0.70833333 0.77272727 0.54054054 0.72727273
|
|
0.80952381 0.73170732 0.73170732 0.75555556]
|
|
|
|
mean value: 0.7158320254051962
|
|
|
|
key: train_fscore
|
|
value: [0.72823219 0.72774869 0.74015748 0.73469388 0.73740053 0.7357513
|
|
0.72631579 0.72965879 0.74226804 0.72386059]
|
|
|
|
mean value: 0.7326087277953888
|
|
|
|
key: test_precision
|
|
value: [0.58333333 0.625 0.5483871 0.62962963 0.5 0.59259259
|
|
0.70833333 0.65217391 0.65217391 0.62962963]
|
|
|
|
mean value: 0.6121253441379668
|
|
|
|
key: train_precision
|
|
value: [0.62443439 0.62053571 0.63513514 0.61802575 0.63761468 0.62555066
|
|
0.62162162 0.62331839 0.62608696 0.62790698]
|
|
|
|
mean value: 0.6260230269863888
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.83333333 1. 1. 0.58823529 0.94117647
|
|
0.94444444 0.83333333 0.83333333 0.94444444]
|
|
|
|
mean value: 0.8696078431372549
|
|
|
|
key: train_recall
|
|
value: [0.87341772 0.87974684 0.88679245 0.90566038 0.87421384 0.89308176
|
|
0.87341772 0.87974684 0.91139241 0.85443038]
|
|
|
|
mean value: 0.8831900326407134
|
|
|
|
key: test_roc_auc
|
|
value: [0.61111111 0.66666667 0.61111111 0.72222222 0.51633987 0.66503268
|
|
0.76633987 0.68137255 0.68137255 0.67810458]
|
|
|
|
mean value: 0.6599673202614379
|
|
|
|
key: train_roc_auc
|
|
value: [0.67405063 0.67088608 0.68706711 0.67118462 0.68710692 0.67755354
|
|
0.67255792 0.67572247 0.68525595 0.67564286]
|
|
|
|
mean value: 0.6777028102858053
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.55555556 0.5483871 0.62962963 0.37037037 0.57142857
|
|
0.68 0.57692308 0.57692308 0.60714286]
|
|
|
|
mean value: 0.5616360234747332
|
|
|
|
key: train_jcc
|
|
value: [0.57261411 0.57201646 0.5875 0.58064516 0.58403361 0.58196721
|
|
0.57024793 0.57438017 0.59016393 0.56722689]
|
|
|
|
mean value: 0.5780795480995707
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00951147 0.00954366 0.00955582 0.00958872 0.00967336 0.00975108
|
|
0.00970745 0.00974202 0.00979972 0.00995803]
|
|
|
|
mean value: 0.00968313217163086
|
|
|
|
key: score_time
|
|
value: [0.00868154 0.00875759 0.00878048 0.00882292 0.00884938 0.00890732
|
|
0.00891852 0.00897455 0.00908208 0.00913715]
|
|
|
|
mean value: 0.008891153335571288
|
|
|
|
key: test_mcc
|
|
value: [0.16903085 0.5007734 0.7261082 0.56011203 0.14098436 0.21004201
|
|
0.71475794 0.31372549 0.25816993 0.37049379]
|
|
|
|
mean value: 0.39641980149576833
|
|
|
|
key: train_mcc
|
|
value: [0.50009015 0.48116688 0.48292914 0.50199282 0.51419131 0.52757592
|
|
0.48983547 0.48983547 0.49635204 0.49546107]
|
|
|
|
mean value: 0.4979430278221952
|
|
|
|
key: test_accuracy
|
|
value: [0.58333333 0.75 0.85714286 0.77142857 0.57142857 0.6
|
|
0.85714286 0.65714286 0.62857143 0.68571429]
|
|
|
|
mean value: 0.6961904761904761
|
|
|
|
key: train_accuracy
|
|
value: [0.75 0.74050633 0.74132492 0.75078864 0.75709779 0.76340694
|
|
0.7444795 0.7444795 0.74763407 0.74763407]
|
|
|
|
mean value: 0.7487351754981432
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.74285714 0.86486486 0.78947368 0.54545455 0.63157895
|
|
0.86486486 0.66666667 0.62857143 0.7027027 ]
|
|
|
|
mean value: 0.6982489393015708
|
|
|
|
key: train_fscore
|
|
value: [0.7523511 0.73717949 0.74691358 0.75692308 0.75862069 0.7706422
|
|
0.75076923 0.75076923 0.75460123 0.75 ]
|
|
|
|
mean value: 0.7528769821550523
|
|
|
|
key: test_precision
|
|
value: [0.6 0.76470588 0.8 0.71428571 0.5625 0.57142857
|
|
0.84210526 0.66666667 0.64705882 0.68421053]
|
|
|
|
mean value: 0.6852961447736989
|
|
|
|
key: train_precision
|
|
value: [0.74534161 0.74675325 0.73333333 0.74096386 0.75625 0.75
|
|
0.73053892 0.73053892 0.73214286 0.74074074]
|
|
|
|
mean value: 0.7406603492610074
|
|
|
|
key: test_recall
|
|
value: [0.5 0.72222222 0.94117647 0.88235294 0.52941176 0.70588235
|
|
0.88888889 0.66666667 0.61111111 0.72222222]
|
|
|
|
mean value: 0.7169934640522876
|
|
|
|
key: train_recall
|
|
value: [0.75949367 0.7278481 0.76100629 0.77358491 0.76100629 0.79245283
|
|
0.7721519 0.7721519 0.77848101 0.75949367]
|
|
|
|
mean value: 0.7657670567629966
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 0.75 0.85947712 0.7745098 0.57026144 0.60294118
|
|
0.85620915 0.65686275 0.62908497 0.68464052]
|
|
|
|
mean value: 0.6967320261437908
|
|
|
|
key: train_roc_auc
|
|
value: [0.75 0.74050633 0.74126264 0.7507165 0.75708542 0.76331502
|
|
0.74456652 0.74456652 0.74773107 0.74767136]
|
|
|
|
mean value: 0.74874213836478
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.59090909 0.76190476 0.65217391 0.375 0.46153846
|
|
0.76190476 0.5 0.45833333 0.54166667]
|
|
|
|
mean value: 0.5478430989300554
|
|
|
|
key: train_jcc
|
|
value: [0.60301508 0.58375635 0.59605911 0.60891089 0.61111111 0.62686567
|
|
0.60098522 0.60098522 0.60591133 0.6 ]
|
|
|
|
mean value: 0.6037599981096068
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00937033 0.0111382 0.01077557 0.01083684 0.0093565 0.01025081
|
|
0.01027107 0.01103401 0.01049685 0.01045322]
|
|
|
|
mean value: 0.010398340225219727
|
|
|
|
key: score_time
|
|
value: [0.0183351 0.01748371 0.01819801 0.01933193 0.01758265 0.01811266
|
|
0.01768136 0.01772475 0.01783299 0.01692319]
|
|
|
|
mean value: 0.017920637130737306
|
|
|
|
key: test_mcc
|
|
value: [ 0.27820744 0.55555556 0.08852507 0.32673202 -0.09978902 0.44342203
|
|
0.42810458 0.31372549 -0.023338 0.50238608]
|
|
|
|
mean value: 0.28135312415157965
|
|
|
|
key: train_mcc
|
|
value: [0.50669459 0.47545552 0.53965039 0.50845145 0.53965039 0.50845145
|
|
0.45126865 0.5711897 0.54579834 0.48317837]
|
|
|
|
mean value: 0.5129788851827025
|
|
|
|
key: test_accuracy
|
|
value: [0.63888889 0.77777778 0.54285714 0.65714286 0.45714286 0.71428571
|
|
0.71428571 0.65714286 0.48571429 0.74285714]
|
|
|
|
mean value: 0.6388095238095238
|
|
|
|
key: train_accuracy
|
|
value: [0.75316456 0.73734177 0.76971609 0.75394322 0.76971609 0.75394322
|
|
0.72555205 0.78548896 0.77287066 0.74132492]
|
|
|
|
mean value: 0.7563061534161243
|
|
|
|
key: test_fscore
|
|
value: [0.62857143 0.77777778 0.55555556 0.68421053 0.34482759 0.73684211
|
|
0.72222222 0.66666667 0.4375 0.7804878 ]
|
|
|
|
mean value: 0.6334661673457543
|
|
|
|
key: train_fscore
|
|
value: [0.75776398 0.72964169 0.77399381 0.7607362 0.77399381 0.7607362
|
|
0.72025723 0.7875 0.77358491 0.73376623]
|
|
|
|
mean value: 0.7571974051856762
|
|
|
|
key: test_precision
|
|
value: [0.64705882 0.77777778 0.52631579 0.61904762 0.41666667 0.66666667
|
|
0.72222222 0.66666667 0.5 0.69565217]
|
|
|
|
mean value: 0.6238074405963758
|
|
|
|
key: train_precision
|
|
value: [0.74390244 0.75167785 0.76219512 0.74251497 0.76219512 0.74251497
|
|
0.73202614 0.77777778 0.76875 0.75333333]
|
|
|
|
mean value: 0.7536887730297543
|
|
|
|
key: test_recall
|
|
value: [0.61111111 0.77777778 0.58823529 0.76470588 0.29411765 0.82352941
|
|
0.72222222 0.66666667 0.38888889 0.88888889]
|
|
|
|
mean value: 0.6526143790849673
|
|
|
|
key: train_recall
|
|
value: [0.7721519 0.70886076 0.78616352 0.77987421 0.78616352 0.77987421
|
|
0.70886076 0.79746835 0.77848101 0.71518987]
|
|
|
|
mean value: 0.7613088129925961
|
|
|
|
key: test_roc_auc
|
|
value: [0.63888889 0.77777778 0.54411765 0.66013072 0.45261438 0.71732026
|
|
0.71405229 0.65686275 0.48856209 0.73856209]
|
|
|
|
mean value: 0.6388888888888888
|
|
|
|
key: train_roc_auc
|
|
value: [0.75316456 0.73734177 0.76966404 0.75386116 0.76966404 0.75386116
|
|
0.72549956 0.78552663 0.77288831 0.74124274]
|
|
|
|
mean value: 0.7562713955895232
|
|
|
|
key: test_jcc
|
|
value: [0.45833333 0.63636364 0.38461538 0.52 0.20833333 0.58333333
|
|
0.56521739 0.5 0.28 0.64 ]
|
|
|
|
mean value: 0.4776196412283369
|
|
|
|
key: train_jcc
|
|
value: [0.61 0.57435897 0.63131313 0.61386139 0.63131313 0.61386139
|
|
0.56281407 0.64948454 0.63076923 0.57948718]
|
|
|
|
mean value: 0.6097263025953108
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01542187 0.01616073 0.01565242 0.01571083 0.01529074 0.01559758
|
|
0.01642108 0.01565695 0.01558185 0.01572609]
|
|
|
|
mean value: 0.015722012519836424
|
|
|
|
key: score_time
|
|
value: [0.0105958 0.01050568 0.0106566 0.01063514 0.01046419 0.0108273
|
|
0.01060271 0.01053166 0.01047659 0.01044893]
|
|
|
|
mean value: 0.01057446002960205
|
|
|
|
key: test_mcc
|
|
value: [0.4472136 0.66666667 0.5104265 0.7261082 0.14002801 0.5104265
|
|
0.71568627 0.42810458 0.42810458 0.54248366]
|
|
|
|
mean value: 0.511524856256581
|
|
|
|
key: train_mcc
|
|
value: [0.70908803 0.65907322 0.71619687 0.69720133 0.72244119 0.68473245
|
|
0.69085626 0.70357543 0.70348698 0.70361082]
|
|
|
|
mean value: 0.6990262571392772
|
|
|
|
key: test_accuracy
|
|
value: [0.72222222 0.83333333 0.74285714 0.85714286 0.57142857 0.74285714
|
|
0.85714286 0.71428571 0.71428571 0.77142857]
|
|
|
|
mean value: 0.7526984126984126
|
|
|
|
key: train_accuracy
|
|
value: [0.85443038 0.82911392 0.85804416 0.84858044 0.86119874 0.84227129
|
|
0.84542587 0.85173502 0.85173502 0.85173502]
|
|
|
|
mean value: 0.8494269855847941
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.83333333 0.76923077 0.86486486 0.51612903 0.76923077
|
|
0.85714286 0.72222222 0.72222222 0.77777778]
|
|
|
|
mean value: 0.7568995953546038
|
|
|
|
key: train_fscore
|
|
value: [0.8525641 0.82467532 0.85981308 0.85 0.8625 0.8447205
|
|
0.84444444 0.84984026 0.85173502 0.85266458]
|
|
|
|
mean value: 0.8492957300856865
|
|
|
|
key: test_precision
|
|
value: [0.7 0.83333333 0.68181818 0.8 0.57142857 0.68181818
|
|
0.88235294 0.72222222 0.72222222 0.77777778]
|
|
|
|
mean value: 0.7372973431796961
|
|
|
|
key: train_precision
|
|
value: [0.86363636 0.84666667 0.85185185 0.8447205 0.85714286 0.83435583
|
|
0.84713376 0.85806452 0.8490566 0.8447205 ]
|
|
|
|
mean value: 0.8497349439171819
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.83333333 0.88235294 0.94117647 0.47058824 0.88235294
|
|
0.83333333 0.72222222 0.72222222 0.77777778]
|
|
|
|
mean value: 0.7843137254901961
|
|
|
|
key: train_recall
|
|
value: [0.84177215 0.80379747 0.86792453 0.85534591 0.86792453 0.85534591
|
|
0.84177215 0.84177215 0.85443038 0.86075949]
|
|
|
|
mean value: 0.84908446779715
|
|
|
|
key: test_roc_auc
|
|
value: [0.72222222 0.83333333 0.74673203 0.85947712 0.56862745 0.74673203
|
|
0.85784314 0.71405229 0.71405229 0.77124183]
|
|
|
|
mean value: 0.7534313725490196
|
|
|
|
key: train_roc_auc
|
|
value: [0.85443038 0.82911392 0.8580129 0.84855903 0.86117745 0.84222992
|
|
0.84541438 0.85170369 0.85174349 0.85176339]
|
|
|
|
mean value: 0.849414855505135
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.71428571 0.625 0.76190476 0.34782609 0.625
|
|
0.75 0.56521739 0.56521739 0.63636364]
|
|
|
|
mean value: 0.6174148315452663
|
|
|
|
key: train_jcc
|
|
value: [0.74301676 0.70165746 0.75409836 0.73913043 0.75824176 0.7311828
|
|
0.73076923 0.73888889 0.74175824 0.7431694 ]
|
|
|
|
mean value: 0.7381913328042566
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.67205691 1.74800611 1.71937871 1.71159053 1.55831695 1.38682938
|
|
1.66299558 1.92333603 1.57990313 1.74328637]
|
|
|
|
mean value: 1.6705699682235717
|
|
|
|
key: score_time
|
|
value: [0.01249242 0.01816964 0.01529384 0.01299953 0.01308084 0.01252365
|
|
0.01541805 0.01910162 0.01590967 0.01836467]
|
|
|
|
mean value: 0.015335392951965333
|
|
|
|
key: test_mcc
|
|
value: [0.50709255 0.77777778 0.54754393 0.61059098 0.20406349 0.66229864
|
|
0.4869281 0.43605973 0.42810458 0.65686275]
|
|
|
|
mean value: 0.5317322530882816
|
|
|
|
key: train_mcc
|
|
value: [0.94320801 0.96837383 0.96847259 0.98738158 0.96847385 0.98738158
|
|
0.96222284 0.97476316 0.98746069 0.98109152]
|
|
|
|
mean value: 0.9728829641992514
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.88888889 0.77142857 0.8 0.6 0.82857143
|
|
0.74285714 0.71428571 0.71428571 0.82857143]
|
|
|
|
mean value: 0.763888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.97151899 0.98417722 0.98422713 0.99369085 0.98422713 0.99369085
|
|
0.98107256 0.9873817 0.99369085 0.99053628]
|
|
|
|
mean value: 0.9864213552689374
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.88888889 0.77777778 0.81081081 0.5 0.83333333
|
|
0.74285714 0.75 0.72222222 0.83333333]
|
|
|
|
mean value: 0.7586496236496236
|
|
|
|
key: train_fscore
|
|
value: [0.97178683 0.98412698 0.98432602 0.99371069 0.98422713 0.99371069
|
|
0.98113208 0.98734177 0.99371069 0.99047619]
|
|
|
|
mean value: 0.9864549079700586
|
|
|
|
key: test_precision
|
|
value: [0.8 0.88888889 0.73684211 0.75 0.63636364 0.78947368
|
|
0.76470588 0.68181818 0.72222222 0.83333333]
|
|
|
|
mean value: 0.7603647934452888
|
|
|
|
key: train_precision
|
|
value: [0.96273292 0.98726115 0.98125 0.99371069 0.98734177 0.99371069
|
|
0.975 0.98734177 0.9875 0.99363057]
|
|
|
|
mean value: 0.9849479566951478
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.88888889 0.82352941 0.88235294 0.41176471 0.88235294
|
|
0.72222222 0.83333333 0.72222222 0.83333333]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_recall
|
|
value: [0.98101266 0.98101266 0.98742138 0.99371069 0.98113208 0.99371069
|
|
0.98734177 0.98734177 1. 0.98734177]
|
|
|
|
mean value: 0.9880025475678689
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.88888889 0.77287582 0.80228758 0.59477124 0.83006536
|
|
0.74346405 0.71078431 0.71405229 0.82843137]
|
|
|
|
mean value: 0.7635620915032679
|
|
|
|
key: train_roc_auc
|
|
value: [0.97151899 0.98417722 0.98421702 0.99369079 0.98423692 0.99369079
|
|
0.98109227 0.98738158 0.99371069 0.99052623]
|
|
|
|
mean value: 0.9864242496616511
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.8 0.63636364 0.68181818 0.33333333 0.71428571
|
|
0.59090909 0.6 0.56521739 0.71428571]
|
|
|
|
mean value: 0.620764163372859
|
|
|
|
key: train_jcc
|
|
value: [0.94512195 0.96875 0.9691358 0.9875 0.9689441 0.9875
|
|
0.96296296 0.975 0.9875 0.98113208]
|
|
|
|
mean value: 0.9733546891502192
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03031135 0.02292132 0.02027917 0.02237177 0.02224588 0.02189922
|
|
0.02142787 0.02358723 0.01928711 0.02066112]
|
|
|
|
mean value: 0.0224992036819458
|
|
|
|
key: score_time
|
|
value: [0.01231074 0.00908279 0.00853205 0.00865054 0.00881147 0.00856233
|
|
0.00856781 0.00854087 0.0085125 0.00855494]
|
|
|
|
mean value: 0.009012603759765625
|
|
|
|
key: test_mcc
|
|
value: [0.55555556 0.68376346 0.67680204 0.54458115 0.48524851 0.62873728
|
|
0.5815291 0.38195106 0.50238608 0.37340802]
|
|
|
|
mean value: 0.5413962266125969
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.83333333 0.82857143 0.77142857 0.74285714 0.8
|
|
0.77142857 0.68571429 0.74285714 0.68571429]
|
|
|
|
mean value: 0.763968253968254
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.8125 0.84210526 0.75 0.72727273 0.82051282
|
|
0.73333333 0.73170732 0.7804878 0.71794872]
|
|
|
|
mean value: 0.7693645761954492
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.92857143 0.76190476 0.8 0.75 0.72727273
|
|
0.91666667 0.65217391 0.69565217 0.66666667]
|
|
|
|
mean value: 0.7676686115816551
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.72222222 0.94117647 0.70588235 0.70588235 0.94117647
|
|
0.61111111 0.83333333 0.88888889 0.77777778]
|
|
|
|
mean value: 0.7905228758169934
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.77777778 0.83333333 0.83169935 0.76960784 0.74183007 0.80392157
|
|
0.77614379 0.68137255 0.73856209 0.68300654]
|
|
|
|
mean value: 0.7637254901960785
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.68421053 0.72727273 0.6 0.57142857 0.69565217
|
|
0.57894737 0.57692308 0.64 0.56 ]
|
|
|
|
mean value: 0.6270798080637897
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10861707 0.10948396 0.10891843 0.11251521 0.10827303 0.10929155
|
|
0.10902286 0.10885835 0.10935378 0.10915399]
|
|
|
|
mean value: 0.10934882164001465
|
|
|
|
key: score_time
|
|
value: [0.01715016 0.01729298 0.01747084 0.01766753 0.01737213 0.01722693
|
|
0.01735616 0.01723003 0.01750684 0.01739144]
|
|
|
|
mean value: 0.017366504669189452
|
|
|
|
key: test_mcc
|
|
value: [0.23570226 0.66666667 0.37955656 0.67680204 0.31774895 0.3180345
|
|
0.4869281 0.42906394 0.19934641 0.48524851]
|
|
|
|
mean value: 0.41950979267853006
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.61111111 0.83333333 0.68571429 0.82857143 0.65714286 0.65714286
|
|
0.74285714 0.71428571 0.6 0.74285714]
|
|
|
|
mean value: 0.7073015873015873
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.53333333 0.83333333 0.7027027 0.84210526 0.6 0.66666667
|
|
0.74285714 0.73684211 0.61111111 0.75675676]
|
|
|
|
mean value: 0.7025708415182099
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.83333333 0.65 0.76190476 0.69230769 0.63157895
|
|
0.76470588 0.7 0.61111111 0.73684211]
|
|
|
|
mean value: 0.7048450500308086
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.44444444 0.83333333 0.76470588 0.94117647 0.52941176 0.70588235
|
|
0.72222222 0.77777778 0.61111111 0.77777778]
|
|
|
|
mean value: 0.7107843137254902
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.61111111 0.83333333 0.6879085 0.83169935 0.65359477 0.65849673
|
|
0.74346405 0.7124183 0.5996732 0.74183007]
|
|
|
|
mean value: 0.7073529411764706
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.36363636 0.71428571 0.54166667 0.72727273 0.42857143 0.5
|
|
0.59090909 0.58333333 0.44 0.60869565]
|
|
|
|
mean value: 0.5498370976849238
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01006556 0.00981927 0.00941396 0.00939035 0.00940633 0.0093112
|
|
0.00944066 0.00934458 0.009516 0.00944734]
|
|
|
|
mean value: 0.009515523910522461
|
|
|
|
key: score_time
|
|
value: [0.00883698 0.00861788 0.00850415 0.0085454 0.00852942 0.00849962
|
|
0.00850987 0.00850892 0.00852394 0.00889754]
|
|
|
|
mean value: 0.008597373962402344
|
|
|
|
key: test_mcc
|
|
value: [-0.1118034 0.56980288 0.15549417 0.4869281 -0.26403934 0.31354672
|
|
0.14852213 0.25573908 -0.023338 0.37254902]
|
|
|
|
mean value: 0.19034013612049247
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.44444444 0.77777778 0.57142857 0.74285714 0.37142857 0.65714286
|
|
0.57142857 0.62857143 0.48571429 0.68571429]
|
|
|
|
mean value: 0.5936507936507937
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.47368421 0.8 0.61538462 0.74285714 0.3125 0.625
|
|
0.54545455 0.64864865 0.4375 0.68571429]
|
|
|
|
mean value: 0.5886743448585554
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.45 0.72727273 0.54545455 0.72222222 0.33333333 0.66666667
|
|
0.6 0.63157895 0.5 0.70588235]
|
|
|
|
mean value: 0.5882410795259092
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.88888889 0.70588235 0.76470588 0.29411765 0.58823529
|
|
0.5 0.66666667 0.38888889 0.66666667]
|
|
|
|
mean value: 0.5964052287581699
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.44444444 0.77777778 0.5751634 0.74346405 0.36928105 0.65522876
|
|
0.57352941 0.62745098 0.48856209 0.68627451]
|
|
|
|
mean value: 0.5941176470588235
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.31034483 0.66666667 0.44444444 0.59090909 0.18518519 0.45454545
|
|
0.375 0.48 0.28 0.52173913]
|
|
|
|
mean value: 0.4308834799771831
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.51522064 1.57600856 1.55131364 1.6932795 1.68040347 1.81337404
|
|
1.73380113 1.72802973 1.81885767 1.71407819]
|
|
|
|
mean value: 1.6824366569519043
|
|
|
|
key: score_time
|
|
value: [0.08909416 0.08941221 0.0988338 0.09735966 0.1245513 0.09853697
|
|
0.10165143 0.09904146 0.13339996 0.09609699]
|
|
|
|
mean value: 0.10279779434204102
|
|
|
|
key: test_mcc
|
|
value: [0.38949042 0.61205637 0.4869281 0.65686275 0.19802951 0.54248366
|
|
0.54754393 0.37049379 0.37340802 0.54248366]
|
|
|
|
mean value: 0.4719780213530567
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.69444444 0.80555556 0.74285714 0.82857143 0.6 0.77142857
|
|
0.77142857 0.68571429 0.68571429 0.77142857]
|
|
|
|
mean value: 0.7357142857142858
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.68571429 0.8 0.74285714 0.82352941 0.5625 0.76470588
|
|
0.76470588 0.7027027 0.71794872 0.77777778]
|
|
|
|
mean value: 0.7342441803471215
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.70588235 0.82352941 0.72222222 0.82352941 0.6 0.76470588
|
|
0.8125 0.68421053 0.66666667 0.77777778]
|
|
|
|
mean value: 0.7381024251805985
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.77777778 0.76470588 0.82352941 0.52941176 0.76470588
|
|
0.72222222 0.72222222 0.77777778 0.77777778]
|
|
|
|
mean value: 0.7326797385620915
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.69444444 0.80555556 0.74346405 0.82843137 0.59803922 0.77124183
|
|
0.77287582 0.68464052 0.68300654 0.77124183]
|
|
|
|
mean value: 0.7352941176470588
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.52173913 0.66666667 0.59090909 0.7 0.39130435 0.61904762
|
|
0.61904762 0.54166667 0.56 0.63636364]
|
|
|
|
mean value: 0.5846744776962168
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.94859123 0.94930005 1.05498004 1.21180391 1.18700242 1.12215519
|
|
1.13519335 1.14038348 1.32968426 1.12452793]
|
|
|
|
mean value: 1.1203621864318847
|
|
|
|
key: score_time
|
|
value: [0.19991851 0.142133 0.15047169 0.15805769 0.15392733 0.15069294
|
|
0.15002155 0.16039896 0.16268349 0.15586495]
|
|
|
|
mean value: 0.158417010307312
|
|
|
|
key: test_mcc
|
|
value: [0.4472136 0.67082039 0.56011203 0.65686275 0.0825123 0.54248366
|
|
0.7261082 0.42906394 0.66009836 0.60000322]
|
|
|
|
mean value: 0.5375278443260683
|
|
|
|
key: train_mcc
|
|
value: [0.8734877 0.87341772 0.88014012 0.89276332 0.8864342 0.86119736
|
|
0.88014012 0.87389037 0.88014489 0.87381578]
|
|
|
|
mean value: 0.8775431586883564
|
|
|
|
key: test_accuracy
|
|
value: [0.72222222 0.83333333 0.77142857 0.82857143 0.54285714 0.77142857
|
|
0.85714286 0.71428571 0.82857143 0.8 ]
|
|
|
|
mean value: 0.766984126984127
|
|
|
|
key: train_accuracy
|
|
value: [0.93670886 0.93670886 0.94006309 0.94637224 0.94321767 0.93059937
|
|
0.94006309 0.93690852 0.94006309 0.93690852]
|
|
|
|
mean value: 0.9387613305115202
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.82352941 0.78947368 0.82352941 0.5 0.76470588
|
|
0.84848485 0.73684211 0.84210526 0.81081081]
|
|
|
|
mean value: 0.7676323523072749
|
|
|
|
key: train_fscore
|
|
value: [0.93630573 0.93670886 0.94043887 0.94637224 0.94339623 0.93081761
|
|
0.93968254 0.93710692 0.94006309 0.93670886]
|
|
|
|
mean value: 0.9387600951106223
|
|
|
|
key: test_precision
|
|
value: [0.7 0.875 0.71428571 0.82352941 0.53333333 0.76470588
|
|
0.93333333 0.7 0.8 0.78947368]
|
|
|
|
mean value: 0.7633661359280555
|
|
|
|
key:/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
train_precision
|
|
value: [0.94230769 0.93670886 0.9375 0.94936709 0.94339623 0.93081761
|
|
0.94267516 0.93125 0.93710692 0.93670886]
|
|
|
|
mean value: 0.9387838416386924
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.77777778 0.88235294 0.82352941 0.47058824 0.76470588
|
|
0.77777778 0.77777778 0.88888889 0.83333333]
|
|
|
|
mean value: 0.7774509803921569
|
|
|
|
key: train_recall
|
|
value: [0.93037975 0.93670886 0.94339623 0.94339623 0.94339623 0.93081761
|
|
0.93670886 0.94303797 0.94303797 0.93670886]
|
|
|
|
mean value: 0.9387588567789189
|
|
|
|
key: test_roc_auc
|
|
value: [0.72222222 0.83333333 0.7745098 0.82843137 0.54084967 0.77124183
|
|
0.85947712 0.7124183 0.82679739 0.79901961]
|
|
|
|
mean value: 0.7668300653594771
|
|
|
|
key: train_roc_auc
|
|
value: [0.93670886 0.93670886 0.94005254 0.94638166 0.9432171 0.93059868
|
|
0.94005254 0.93692779 0.94007245 0.93690789]
|
|
|
|
mean value: 0.9387628373537139
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.7 0.65217391 0.7 0.33333333 0.61904762
|
|
0.73684211 0.58333333 0.72727273 0.68181818]
|
|
|
|
mean value: 0.6317154546445164
|
|
|
|
key: train_jcc
|
|
value: [0.88023952 0.88095238 0.88757396 0.89820359 0.89285714 0.87058824
|
|
0.88622754 0.8816568 0.88690476 0.88095238]
|
|
|
|
mean value: 0.8846156329874189
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01165247 0.01152277 0.01164889 0.01150632 0.01147509 0.01155019
|
|
0.01157808 0.01161909 0.01158905 0.0117197 ]
|
|
|
|
mean value: 0.011586165428161621
|
|
|
|
key: score_time
|
|
value: [0.01032376 0.01035714 0.01048326 0.01028204 0.01026344 0.01024675
|
|
0.01024652 0.01032281 0.0105052 0.01020026]
|
|
|
|
mean value: 0.010323119163513184
|
|
|
|
key: test_mcc
|
|
value: [0.16903085 0.5007734 0.7261082 0.56011203 0.14098436 0.21004201
|
|
0.71475794 0.31372549 0.25816993 0.37049379]
|
|
|
|
mean value: 0.39641980149576833
|
|
|
|
key: train_mcc
|
|
value: [0.50009015 0.48116688 0.48292914 0.50199282 0.51419131 0.52757592
|
|
0.48983547 0.48983547 0.49635204 0.49546107]
|
|
|
|
mean value: 0.4979430278221952
|
|
|
|
key: test_accuracy
|
|
value: [0.58333333 0.75 0.85714286 0.77142857 0.57142857 0.6
|
|
0.85714286 0.65714286 0.62857143 0.68571429]
|
|
|
|
mean value: 0.6961904761904761
|
|
|
|
key: train_accuracy
|
|
value: [0.75 0.74050633 0.74132492 0.75078864 0.75709779 0.76340694
|
|
0.7444795 0.7444795 0.74763407 0.74763407]
|
|
|
|
mean value: 0.7487351754981432
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.74285714 0.86486486 0.78947368 0.54545455 0.63157895
|
|
0.86486486 0.66666667 0.62857143 0.7027027 ]
|
|
|
|
mean value: 0.6982489393015708
|
|
|
|
key: train_fscore
|
|
value: [0.7523511 0.73717949 0.74691358 0.75692308 0.75862069 0.7706422
|
|
0.75076923 0.75076923 0.75460123 0.75 ]
|
|
|
|
mean value: 0.7528769821550523
|
|
|
|
key: test_precision
|
|
value: [0.6 0.76470588 0.8 0.71428571 0.5625 0.57142857
|
|
0.84210526 0.66666667 0.64705882 0.68421053]
|
|
|
|
mean value: 0.6852961447736989
|
|
|
|
key: train_precision
|
|
value: [0.74534161 0.74675325 0.73333333 0.74096386 0.75625 0.75
|
|
0.73053892 0.73053892 0.73214286 0.74074074]
|
|
|
|
mean value: 0.7406603492610074
|
|
|
|
key: test_recall
|
|
value: [0.5 0.72222222 0.94117647 0.88235294 0.52941176 0.70588235
|
|
0.88888889 0.66666667 0.61111111 0.72222222]
|
|
|
|
mean value: 0.7169934640522876
|
|
|
|
key: train_recall
|
|
value: [0.75949367 0.7278481 0.76100629 0.77358491 0.76100629 0.79245283
|
|
0.7721519 0.7721519 0.77848101 0.75949367]
|
|
|
|
mean value: 0.7657670567629966
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 0.75 0.85947712 0.7745098 0.57026144 0.60294118
|
|
0.85620915 0.65686275 0.62908497 0.68464052]
|
|
|
|
mean value: 0.6967320261437908
|
|
|
|
key: train_roc_auc
|
|
value: [0.75 0.74050633 0.74126264 0.7507165 0.75708542 0.76331502
|
|
0.74456652 0.74456652 0.74773107 0.74767136]
|
|
|
|
mean value: 0.74874213836478
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.59090909 0.76190476 0.65217391 0.375 0.46153846
|
|
0.76190476 0.5 0.45833333 0.54166667]
|
|
|
|
mean value: 0.5478430989300554
|
|
|
|
key: train_jcc
|
|
value: [0.60301508 0.58375635 0.59605911 0.60891089 0.61111111 0.62686567
|
|
0.60098522 0.60098522 0.60591133 0.6 ]
|
|
|
|
mean value: 0.6037599981096068
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [1.83040285 0.68204451 0.69062304 1.52602482 4.06419945 2.20627093
|
|
1.37749624 2.01253963 6.30247664 2.33498812]
|
|
|
|
mean value: 2.3027066230773925
|
|
|
|
key: score_time
|
|
value: [0.01224995 0.01245737 0.01228404 0.02493405 0.01278758 0.01422763
|
|
0.01192284 0.05001879 0.01314044 0.01353979]
|
|
|
|
mean value: 0.01775624752044678
|
|
|
|
key: test_mcc
|
|
value: [0.61977979 0.68376346 0.72347804 0.54458115 0.42810458 0.66229864
|
|
0.67680204 0.42810458 0.7261082 0.66229864]
|
|
|
|
mean value: 0.6155319110264079
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.80555556 0.83333333 0.85714286 0.77142857 0.71428571 0.82857143
|
|
0.82857143 0.71428571 0.85714286 0.82857143]
|
|
|
|
mean value: 0.8038888888888889
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82051282 0.8125 0.83870968 0.75 0.70588235 0.83333333
|
|
0.8125 0.72222222 0.84848485 0.82352941]
|
|
|
|
mean value: 0.7967674666678463
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76190476 0.92857143 0.92857143 0.8 0.70588235 0.78947368
|
|
0.92857143 0.72222222 0.93333333 0.875 ]
|
|
|
|
mean value: 0.8373530640326307
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.72222222 0.76470588 0.70588235 0.70588235 0.88235294
|
|
0.72222222 0.72222222 0.77777778 0.77777778]
|
|
|
|
mean value: 0.7669934640522875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.80555556 0.83333333 0.85457516 0.76960784 0.71405229 0.83006536
|
|
0.83169935 0.71405229 0.85947712 0.83006536]
|
|
|
|
mean value: 0.8042483660130719
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.69565217 0.68421053 0.72222222 0.6 0.54545455 0.71428571
|
|
0.68421053 0.56521739 0.73684211 0.7 ]
|
|
|
|
mean value: 0.664809520507461
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05960512 0.07252216 0.08704185 0.09135747 0.08128071 0.08341789
|
|
0.07867599 0.08321714 0.08362532 0.11062169]
|
|
|
|
mean value: 0.08313653469085694
|
|
|
|
key: score_time
|
|
value: [0.02530026 0.02149534 0.01821113 0.02195978 0.02221489 0.02192664
|
|
0.02198982 0.02070355 0.0226922 0.02168036]
|
|
|
|
mean value: 0.021817398071289063
|
|
|
|
key: test_mcc
|
|
value: [0.40482045 0.55555556 0.54754393 0.5104265 0.02622965 0.54248366
|
|
0.37955656 0.31354672 0.20327978 0.54248366]
|
|
|
|
mean value: 0.40259264696226943
|
|
|
|
key: train_mcc
|
|
value: [0.81019149 0.81686482 0.80479562 0.81705278 0.84865252 0.80442658
|
|
0.81705278 0.79837556 0.79221096 0.8296712 ]
|
|
|
|
mean value: 0.8139294318855579
|
|
|
|
key: test_accuracy
|
|
value: [0.69444444 0.77777778 0.77142857 0.74285714 0.51428571 0.77142857
|
|
0.68571429 0.65714286 0.6 0.77142857]
|
|
|
|
mean value: 0.6986507936507936
|
|
|
|
key: train_accuracy
|
|
value: [0.90506329 0.90822785 0.9022082 0.90851735 0.92429022 0.9022082
|
|
0.90851735 0.89905363 0.89589905 0.9148265 ]
|
|
|
|
mean value: 0.9068811643972368
|
|
|
|
key: test_fscore
|
|
value: [0.73170732 0.77777778 0.77777778 0.76923077 0.48484848 0.76470588
|
|
0.66666667 0.68421053 0.58823529 0.77777778]
|
|
|
|
mean value: 0.7022938273938802
|
|
|
|
key: train_fscore
|
|
value: [0.9044586 0.90965732 0.90402477 0.90851735 0.92405063 0.90282132
|
|
0.90851735 0.9 0.89719626 0.9148265 ]
|
|
|
|
mean value: 0.9074070097346472
|
|
|
|
key: test_precision
|
|
value: [0.65217391 0.77777778 0.73684211 0.68181818 0.5 0.76470588
|
|
0.73333333 0.65 0.625 0.77777778]
|
|
|
|
mean value: 0.6899428971366648
|
|
|
|
key: train_precision
|
|
value: [0.91025641 0.89570552 0.8902439 0.91139241 0.92993631 0.9
|
|
0.90566038 0.88888889 0.88343558 0.91194969]
|
|
|
|
mean value: 0.9027469079567659
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.77777778 0.82352941 0.88235294 0.47058824 0.76470588
|
|
0.61111111 0.72222222 0.55555556 0.77777778]
|
|
|
|
mean value: 0.7218954248366013
|
|
|
|
key: train_recall
|
|
value: [0.89873418 0.92405063 0.91823899 0.90566038 0.91823899 0.90566038
|
|
0.91139241 0.91139241 0.91139241 0.91772152]
|
|
|
|
mean value: 0.9122482286442162
|
|
|
|
key: test_roc_auc
|
|
value: [0.69444444 0.77777778 0.77287582 0.74673203 0.5130719 0.77124183
|
|
0.6879085 0.65522876 0.60130719 0.77124183]
|
|
|
|
mean value: 0.6991830065359477
|
|
|
|
key: train_roc_auc
|
|
value: [0.90506329 0.90822785 0.90215747 0.90852639 0.92430937 0.90219728
|
|
0.90852639 0.89909243 0.89594777 0.9148356 ]
|
|
|
|
mean value: 0.9068883846827482
|
|
|
|
key: test_jcc
|
|
value: [0.57692308 0.63636364 0.63636364 0.625 0.32 0.61904762
|
|
0.5 0.52 0.41666667 0.63636364]
|
|
|
|
mean value: 0.5486728271728272
|
|
|
|
key: train_jcc
|
|
value: [0.8255814 0.83428571 0.82485876 0.83236994 0.85882353 0.82285714
|
|
0.83236994 0.81818182 0.81355932 0.84302326]
|
|
|
|
mean value: 0.830591081938834
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0136795 0.0100081 0.01195359 0.00942755 0.00932813 0.00924706
|
|
0.00931287 0.00954461 0.00923276 0.00932527]
|
|
|
|
mean value: 0.01010594367980957
|
|
|
|
key: score_time
|
|
value: [0.00931573 0.00924492 0.00878453 0.00859833 0.00871873 0.008672
|
|
0.00852799 0.00857353 0.00871491 0.00868726]
|
|
|
|
mean value: 0.008783793449401856
|
|
|
|
key: test_mcc
|
|
value: [0.2236068 0.5007734 0.57177187 0.62873728 0.08112739 0.54754393
|
|
0.60000322 0.25573908 0.4869281 0.34381054]
|
|
|
|
mean value: 0.4240041618135283
|
|
|
|
key: train_mcc
|
|
value: [0.45265187 0.43386975 0.408835 0.40836591 0.45654605 0.44214672
|
|
0.41583903 0.44859316 0.43587821 0.45329575]
|
|
|
|
mean value: 0.4356021447618876
|
|
|
|
key: test_accuracy
|
|
value: [0.61111111 0.75 0.74285714 0.8 0.54285714 0.77142857
|
|
0.8 0.62857143 0.74285714 0.65714286]
|
|
|
|
mean value: 0.7046825396825397
|
|
|
|
key: train_accuracy
|
|
value: [0.72468354 0.71518987 0.70347003 0.70347003 0.72555205 0.7192429
|
|
0.70662461 0.72239748 0.71608833 0.72555205]
|
|
|
|
mean value: 0.7162270894062213
|
|
|
|
key: test_fscore
|
|
value: [0.63157895 0.75675676 0.79069767 0.82051282 0.46666667 0.77777778
|
|
0.81081081 0.64864865 0.74285714 0.72727273]
|
|
|
|
mean value: 0.7173579973090377
|
|
|
|
key: train_fscore
|
|
value: [0.74029851 0.73214286 0.71856287 0.71686747 0.74635569 0.73746313
|
|
0.72072072 0.73809524 0.73214286 0.73716012]
|
|
|
|
mean value: 0.731980945751615
|
|
|
|
key: test_precision
|
|
value: [0.6 0.73684211 0.65384615 0.72727273 0.53846154 0.73684211
|
|
0.78947368 0.63157895 0.76470588 0.61538462]
|
|
|
|
mean value: 0.6794407759423239
|
|
|
|
key: train_precision
|
|
value: [0.70056497 0.69101124 0.68571429 0.68786127 0.69565217 0.69444444
|
|
0.68571429 0.69662921 0.69101124 0.70520231]
|
|
|
|
mean value: 0.6933805430745759
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.77777778 1. 0.94117647 0.41176471 0.82352941
|
|
0.83333333 0.66666667 0.72222222 0.88888889]
|
|
|
|
mean value: 0.773202614379085
|
|
|
|
key: train_recall
|
|
value: [0.78481013 0.77848101 0.75471698 0.74842767 0.80503145 0.78616352
|
|
0.75949367 0.78481013 0.77848101 0.7721519 ]
|
|
|
|
mean value: 0.7752567470742775
|
|
|
|
key: test_roc_auc
|
|
value: [0.61111111 0.75 0.75 0.80392157 0.53921569 0.77287582
|
|
0.79901961 0.62745098 0.74346405 0.6503268 ]
|
|
|
|
mean value: 0.7047385620915033
|
|
|
|
key: train_roc_auc
|
|
value: [0.72468354 0.71518987 0.70330786 0.70332776 0.72530053 0.71903113
|
|
0.70679086 0.72259374 0.71628453 0.72569859]
|
|
|
|
mean value: 0.7162208422896267
|
|
|
|
key: test_jcc
|
|
value: [0.46153846 0.60869565 0.65384615 0.69565217 0.30434783 0.63636364
|
|
0.68181818 0.48 0.59090909 0.57142857]
|
|
|
|
mean value: 0.5684599748078009
|
|
|
|
key: train_jcc
|
|
value: [0.58767773 0.57746479 0.56074766 0.55868545 0.59534884 0.58411215
|
|
0.56338028 0.58490566 0.57746479 0.58373206]
|
|
|
|
mean value: 0.5773519398369844
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0140748 0.01407027 0.01748443 0.01679134 0.01666498 0.01806664
|
|
0.01892018 0.01885033 0.02053499 0.01682544]
|
|
|
|
mean value: 0.017228341102600096
|
|
|
|
key: score_time
|
|
value: [0.00899243 0.01114273 0.011374 0.01192832 0.01193833 0.01202989
|
|
0.01195478 0.01200414 0.01204705 0.01194572]
|
|
|
|
mean value: 0.011535739898681641
|
|
|
|
key: test_mcc
|
|
value: [0.39440532 0.61977979 0.57177187 0.65792885 0.235008 0.5815291
|
|
0.37049379 0.31354672 0.36962466 0.54248366]
|
|
|
|
mean value: 0.4656571762353827
|
|
|
|
key: train_mcc
|
|
value: [0.75756235 0.67604194 0.6597786 0.5837075 0.52913772 0.68303764
|
|
0.70672566 0.74770113 0.56953939 0.64846186]
|
|
|
|
mean value: 0.6561693780962766
|
|
|
|
key: test_accuracy
|
|
value: [0.69444444 0.80555556 0.74285714 0.8 0.6 0.77142857
|
|
0.68571429 0.65714286 0.62857143 0.77142857]
|
|
|
|
mean value: 0.7157142857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.87658228 0.83227848 0.81388013 0.76025237 0.7192429 0.83911672
|
|
0.85173502 0.87381703 0.74763407 0.82334385]
|
|
|
|
mean value: 0.8137882841512598
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.82051282 0.79069767 0.82926829 0.66666667 0.8
|
|
0.7027027 0.68421053 0.73469388 0.77777778]
|
|
|
|
mean value: 0.7473197005294976
|
|
|
|
key: train_fscore
|
|
value: [0.86956522 0.84637681 0.83923706 0.80512821 0.78132678 0.84866469
|
|
0.85800604 0.87421384 0.79695431 0.81578947]
|
|
|
|
mean value: 0.8335262428267585
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.76190476 0.65384615 0.70833333 0.56 0.69565217
|
|
0.68421053 0.65 0.58064516 0.77777778]
|
|
|
|
mean value: 0.6805703221714516
|
|
|
|
key: train_precision
|
|
value: [0.92198582 0.78074866 0.74038462 0.67965368 0.64112903 0.80337079
|
|
0.82080925 0.86875 0.66525424 0.84931507]
|
|
|
|
mean value: 0.7771401146853855
|
|
|
|
key: test_recall
|
|
value: [0.61111111 0.88888889 1. 1. 0.82352941 0.94117647
|
|
0.72222222 0.72222222 1. 0.77777778]
|
|
|
|
mean value: 0.8486928104575163
|
|
|
|
key: train_recall
|
|
value: [0.82278481 0.92405063 0.96855346 0.98742138 1. 0.89937107
|
|
0.89873418 0.87974684 0.99367089 0.78481013]
|
|
|
|
mean value: 0.9159143380304116
|
|
|
|
key: test_roc_auc
|
|
value: [0.69444444 0.80555556 0.75 0.80555556 0.60620915 0.77614379
|
|
0.68464052 0.65522876 0.61764706 0.77124183]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.87658228 0.83227848 0.81339065 0.75953348 0.71835443 0.83892604
|
|
0.85188281 0.87383568 0.74840777 0.82322267]
|
|
|
|
mean value: 0.8136414298224663
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.69565217 0.65384615 0.70833333 0.5 0.66666667
|
|
0.54166667 0.52 0.58064516 0.63636364]
|
|
|
|
mean value: 0.6003173792079823
|
|
|
|
key: train_jcc
|
|
value: [0.76923077 0.73366834 0.72300469 0.67381974 0.64112903 0.7371134
|
|
0.75132275 0.77653631 0.66244726 0.68888889]
|
|
|
|
mean value: 0.7157161193028951
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01913738 0.0192678 0.0184958 0.02039719 0.01903605 0.01772308
|
|
0.01957345 0.0205121 0.01728296 0.01713586]
|
|
|
|
mean value: 0.018856167793273926
|
|
|
|
key: score_time
|
|
value: [0.00974441 0.01202059 0.01236725 0.01226926 0.01234198 0.01199961
|
|
0.01243424 0.01198888 0.01200747 0.01197147]
|
|
|
|
mean value: 0.011914515495300293
|
|
|
|
key: test_mcc
|
|
value: [0.40089186 0.66332496 0.46804587 0.51449576 0.19943817 0.70196412
|
|
0.67680204 0.14379085 0.31506302 0.31636434]
|
|
|
|
mean value: 0.4400180983626669
|
|
|
|
key: train_mcc
|
|
value: [0.47913671 0.66720064 0.54010335 0.49645901 0.77309659 0.59641779
|
|
0.66187375 0.73642815 0.31308494 0.56463751]
|
|
|
|
mean value: 0.5828438444215096
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.80555556 0.68571429 0.71428571 0.6 0.82857143
|
|
0.82857143 0.57142857 0.6 0.62857143]
|
|
|
|
mean value: 0.692936507936508
|
|
|
|
key: train_accuracy
|
|
value: [0.68670886 0.8164557 0.72870662 0.69716088 0.88643533 0.77917981
|
|
0.82334385 0.86750789 0.59305994 0.74132492]
|
|
|
|
mean value: 0.7619883799864233
|
|
|
|
key: test_fscore
|
|
value: [0.73913043 0.8372093 0.52173913 0.58333333 0.53333333 0.85
|
|
0.8125 0.57142857 0.72 0.72340426]
|
|
|
|
mean value: 0.689207836095736
|
|
|
|
key: train_fscore
|
|
value: [0.76144578 0.84153005 0.63247863 0.56756757 0.88819876 0.81283422
|
|
0.80141844 0.8627451 0.70880361 0.79396985]
|
|
|
|
mean value: 0.7670992018926353
|
|
|
|
key: test_precision
|
|
value: [0.60714286 0.72 1. 1. 0.61538462 0.73913043
|
|
0.92857143 0.58823529 0.5625 0.5862069 ]
|
|
|
|
mean value: 0.7347171526550881
|
|
|
|
key: train_precision
|
|
value: [0.61478599 0.74038462 0.98666667 1. 0.87730061 0.70697674
|
|
0.91129032 0.89189189 0.55087719 0.65833333]
|
|
|
|
mean value: 0.7938507372740486
|
|
|
|
key: test_recall
|
|
value: [0.94444444 1. 0.35294118 0.41176471 0.47058824 1.
|
|
0.72222222 0.55555556 1. 0.94444444]
|
|
|
|
mean value: 0.7401960784313726
|
|
|
|
key: train_recall
|
|
value: [1. 0.97468354 0.46540881 0.39622642 0.89937107 0.95597484
|
|
0.71518987 0.83544304 0.99367089 1. ]
|
|
|
|
mean value: 0.8235968473847624
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.80555556 0.67647059 0.70588235 0.59640523 0.83333333
|
|
0.83169935 0.57189542 0.58823529 0.61928105]
|
|
|
|
mean value: 0.6895424836601307
|
|
|
|
key: train_roc_auc
|
|
value: [0.68670886 0.8164557 0.72953985 0.69811321 0.8863944 0.77862033
|
|
0.82300374 0.86740705 0.59431972 0.74213836]
|
|
|
|
mean value: 0.7622701218055887
|
|
|
|
key: test_jcc
|
|
value: [0.5862069 0.72 0.35294118 0.41176471 0.36363636 0.73913043
|
|
0.68421053 0.4 0.5625 0.56666667]
|
|
|
|
mean value: 0.5387056770306093
|
|
|
|
key: train_jcc
|
|
value: [0.61478599 0.72641509 0.4625 0.39622642 0.79888268 0.68468468
|
|
0.66863905 0.75862069 0.54895105 0.65833333]
|
|
|
|
mean value: 0.6318038993094784
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.15341592 0.14559603 0.14337611 0.14296579 0.14716601 0.14489007
|
|
0.14460921 0.14914179 0.14565468 0.14958763]
|
|
|
|
mean value: 0.14664032459259033
|
|
|
|
key: score_time
|
|
value: [0.01507354 0.01512289 0.01575899 0.015625 0.01577687 0.01592374
|
|
0.01520467 0.01659775 0.01633358 0.01628613]
|
|
|
|
mean value: 0.015770316123962402
|
|
|
|
key: test_mcc
|
|
value: [0.4472136 0.61977979 0.66009836 0.83006536 0.25671802 0.54754393
|
|
0.66229864 0.31372549 0.49507377 0.60130719]
|
|
|
|
mean value: 0.5433824138767602
|
|
|
|
key: train_mcc
|
|
value: [0.95571534 0.94305686 0.94952631 0.94959991 0.98109152 0.94959991
|
|
0.943237 0.96862933 0.9498328 0.97484177]
|
|
|
|
mean value: 0.9565130764088194
|
|
|
|
key: test_accuracy
|
|
value: [0.72222222 0.80555556 0.82857143 0.91428571 0.62857143 0.77142857
|
|
0.82857143 0.65714286 0.74285714 0.8 ]
|
|
|
|
mean value: 0.7699206349206349
|
|
|
|
key: train_accuracy
|
|
value: [0.9778481 0.97151899 0.97476341 0.97476341 0.99053628 0.97476341
|
|
0.97160883 0.98422713 0.97476341 0.9873817 ]
|
|
|
|
mean value: 0.9782174659585513
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.78787879 0.8125 0.91428571 0.58064516 0.77777778
|
|
0.82352941 0.66666667 0.72727273 0.8 ]
|
|
|
|
mean value: 0.762739835219986
|
|
|
|
key: train_fscore
|
|
value: [0.97791798 0.97142857 0.97484277 0.975 0.99059561 0.975
|
|
0.97160883 0.98432602 0.975 0.98742138]
|
|
|
|
mean value: 0.9783141166346138
|
|
|
|
key: test_precision
|
|
value: [0.7 0.86666667 0.86666667 0.88888889 0.64285714 0.73684211
|
|
0.875 0.66666667 0.8 0.82352941]
|
|
|
|
mean value: 0.7867117548773895
|
|
|
|
key: train_precision
|
|
value: [0.97484277 0.97452229 0.97484277 0.9689441 0.9875 0.9689441
|
|
0.96855346 0.97515528 0.96296296 0.98125 ]
|
|
|
|
mean value: 0.9737517727928156
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.72222222 0.76470588 0.94117647 0.52941176 0.82352941
|
|
0.77777778 0.66666667 0.66666667 0.77777778]
|
|
|
|
mean value: 0.7447712418300654
|
|
|
|
key: train_recall
|
|
value: [0.98101266 0.96835443 0.97484277 0.98113208 0.99371069 0.98113208
|
|
0.97468354 0.99367089 0.98734177 0.99367089]
|
|
|
|
mean value: 0.9829551787278084
|
|
|
|
key: test_roc_auc
|
|
value: [0.72222222 0.80555556 0.82679739 0.91503268 0.62581699 0.77287582
|
|
0.83006536 0.65686275 0.74509804 0.80065359]
|
|
|
|
mean value: 0.7700980392156862
|
|
|
|
key: train_roc_auc
|
|
value: [0.9778481 0.97151899 0.97476316 0.97474325 0.99052623 0.97474325
|
|
0.9716185 0.98425683 0.97480296 0.98740148]
|
|
|
|
mean value: 0.9782222752965528
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.65 0.68421053 0.84210526 0.40909091 0.63636364
|
|
0.7 0.5 0.57142857 0.66666667]
|
|
|
|
mean value: 0.62431989063568
|
|
|
|
key: train_jcc
|
|
value: [0.95679012 0.94444444 0.95092025 0.95121951 0.98136646 0.95121951
|
|
0.94478528 0.9691358 0.95121951 0.97515528]
|
|
|
|
mean value: 0.9576256167558563
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05692339 0.08574867 0.07924795 0.07966113 0.06598711 0.07994103
|
|
0.08200693 0.07545042 0.05371451 0.06896377]
|
|
|
|
mean value: 0.07276449203491211
|
|
|
|
key: score_time
|
|
value: [0.02605581 0.02339387 0.0397501 0.0291419 0.02865005 0.03141642
|
|
0.01997232 0.02368736 0.01852202 0.02502227]
|
|
|
|
mean value: 0.026561212539672852
|
|
|
|
key: test_mcc
|
|
value: [0.61205637 0.68376346 0.60678804 0.42906394 0.42810458 0.60130719
|
|
0.67680204 0.37340802 0.42810458 0.66229864]
|
|
|
|
mean value: 0.5501696847597503
|
|
|
|
key: train_mcc
|
|
value: [0.94967147 0.96202532 0.98109227 0.96245424 0.96893923 0.97507568
|
|
0.93720141 0.97507176 0.9628305 0.95630718]
|
|
|
|
mean value: 0.9630669051105084
|
|
|
|
key: test_accuracy
|
|
value: [0.80555556 0.83333333 0.8 0.71428571 0.71428571 0.8
|
|
0.82857143 0.68571429 0.71428571 0.82857143]
|
|
|
|
mean value: 0.7724603174603175
|
|
|
|
key: train_accuracy
|
|
value: [0.97468354 0.98101266 0.99053628 0.98107256 0.98422713 0.9873817
|
|
0.96845426 0.9873817 0.98107256 0.97791798]
|
|
|
|
mean value: 0.98137403665695
|
|
|
|
key: test_fscore
|
|
value: [0.81081081 0.8125 0.77419355 0.6875 0.70588235 0.8
|
|
0.8125 0.71794872 0.72222222 0.82352941]
|
|
|
|
mean value: 0.766708706407473
|
|
|
|
key: train_fscore
|
|
value: [0.97435897 0.98101266 0.99053628 0.98089172 0.98402556 0.98726115
|
|
0.96794872 0.98717949 0.98064516 0.97749196]
|
|
|
|
mean value: 0.9811351663370135
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.92857143 0.85714286 0.73333333 0.70588235 0.77777778
|
|
0.92857143 0.66666667 0.72222222 0.875 ]
|
|
|
|
mean value: 0.7984641751437417
|
|
|
|
key: train_precision
|
|
value: [0.98701299 0.98101266 0.99367089 0.99354839 1. 1.
|
|
0.98051948 1. 1. 0.99346405]
|
|
|
|
mean value: 0.9929228451220621
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.72222222 0.70588235 0.64705882 0.70588235 0.82352941
|
|
0.72222222 0.77777778 0.72222222 0.77777778]
|
|
|
|
mean value: 0.7437908496732026
|
|
|
|
key: train_recall
|
|
value: [0.96202532 0.98101266 0.98742138 0.96855346 0.96855346 0.97484277
|
|
0.9556962 0.97468354 0.96202532 0.96202532]
|
|
|
|
mean value: 0.969683942361277
|
|
|
|
key: test_roc_auc
|
|
value: [0.80555556 0.83333333 0.79738562 0.7124183 0.71405229 0.80065359
|
|
0.83169935 0.68300654 0.71405229 0.83006536]
|
|
|
|
mean value: 0.7722222222222223
|
|
|
|
key: train_roc_auc
|
|
value: [0.97468354 0.98101266 0.99054613 0.98111217 0.98427673 0.98742138
|
|
0.96841414 0.98734177 0.98101266 0.977868 ]
|
|
|
|
mean value: 0.9813689196720006
|
|
|
|
key: test_jcc
|
|
value: [0.68181818 0.68421053 0.63157895 0.52380952 0.54545455 0.66666667
|
|
0.68421053 0.56 0.56521739 0.7 ]
|
|
|
|
mean value: 0.6242966309053265
|
|
|
|
key: train_jcc
|
|
value: [0.95 0.96273292 0.98125 0.9625 0.96855346 0.97484277
|
|
0.9378882 0.97468354 0.96202532 0.95597484]
|
|
|
|
mean value: 0.9630451047954306
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0508275 0.10509944 0.12613726 0.1361196 0.14902663 0.13211441
|
|
0.193712 0.17184043 0.16689801 0.1765039 ]
|
|
|
|
mean value: 0.14082791805267333
|
|
|
|
key: score_time
|
|
value: [0.02170348 0.02615643 0.02238178 0.02268672 0.04280734 0.02516341
|
|
0.04326391 0.04236627 0.03333473 0.03141975]
|
|
|
|
mean value: 0.03112838268280029
|
|
|
|
key: test_mcc
|
|
value: [0.23570226 0.55901699 0.21004201 0.48809353 0.19943817 0.43278921
|
|
0.54248366 0.25671802 0.08852507 0.43605973]
|
|
|
|
mean value: 0.34488686499781873
|
|
|
|
key: train_mcc
|
|
value: [0.97476164 0.98103231 0.97484177 0.98738158 0.98738158 0.98738158
|
|
0.98109152 0.98109152 0.98738158 0.99371044]
|
|
|
|
mean value: 0.983605550432851
|
|
|
|
key: test_accuracy
|
|
value: [0.61111111 0.77777778 0.6 0.71428571 0.6 0.71428571
|
|
0.77142857 0.62857143 0.54285714 0.71428571]
|
|
|
|
mean value: 0.6674603174603174
|
|
|
|
key: train_accuracy
|
|
value: [0.98734177 0.99050633 0.9873817 0.99369085 0.99369085 0.99369085
|
|
0.99053628 0.99053628 0.99369085 0.99684543]
|
|
|
|
mean value: 0.9917911192748473
|
|
|
|
key: test_fscore
|
|
value: [0.53333333 0.78947368 0.63157895 0.76190476 0.53333333 0.72222222
|
|
0.77777778 0.66666667 0.52941176 0.75 ]
|
|
|
|
mean value: 0.6695702491522925
|
|
|
|
key: train_fscore
|
|
value: [0.98726115 0.99047619 0.98734177 0.99371069 0.99371069 0.99371069
|
|
0.99047619 0.99047619 0.99367089 0.9968254 ]
|
|
|
|
mean value: 0.991765984845033
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.57142857 0.64 0.61538462 0.68421053
|
|
0.77777778 0.61904762 0.5625 0.68181818]
|
|
|
|
mean value: 0.6568833958439222
|
|
|
|
key: train_precision
|
|
value: [0.99358974 0.99363057 0.99363057 0.99371069 0.99371069 0.99371069
|
|
0.99363057 0.99363057 0.99367089 1. ]
|
|
|
|
mean value: 0.9942914998131022
|
|
|
|
key: test_recall
|
|
value: [0.44444444 0.83333333 0.70588235 0.94117647 0.47058824 0.76470588
|
|
0.77777778 0.72222222 0.5 0.83333333]
|
|
|
|
mean value: 0.6993464052287581
|
|
|
|
key: train_recall
|
|
value: [0.98101266 0.98734177 0.98113208 0.99371069 0.99371069 0.99371069
|
|
0.98734177 0.98734177 0.99367089 0.99367089]
|
|
|
|
mean value: 0.9892643897778839
|
|
|
|
key: test_roc_auc
|
|
value: [0.61111111 0.77777778 0.60294118 0.72058824 0.59640523 0.71568627
|
|
0.77124183 0.62581699 0.54411765 0.71078431]
|
|
|
|
mean value: 0.6676470588235295
|
|
|
|
key: train_roc_auc
|
|
value: [0.98734177 0.99050633 0.98740148 0.99369079 0.99369079 0.99369079
|
|
0.99052623 0.99052623 0.99369079 0.99683544]
|
|
|
|
mean value: 0.9917900644853117
|
|
|
|
key: test_jcc
|
|
value: [0.36363636 0.65217391 0.46153846 0.61538462 0.36363636 0.56521739
|
|
0.63636364 0.5 0.36 0.6 ]
|
|
|
|
mean value: 0.5117950744907267
|
|
|
|
key: train_jcc
|
|
value: [0.97484277 0.98113208 0.975 0.9875 0.9875 0.9875
|
|
0.98113208 0.98113208 0.98742138 0.99367089]
|
|
|
|
mean value: 0.983683126343444
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.94957113 1.08033276 1.35335708 1.35058165 1.33091569 0.87500858
|
|
0.72606444 0.74691105 0.69290042 0.73771286]
|
|
|
|
mean value: 0.9843355655670166
|
|
|
|
key: score_time
|
|
value: [0.02747321 0.03321004 0.02398467 0.02301693 0.02378416 0.01277089
|
|
0.01287675 0.01277661 0.01323342 0.01306009]
|
|
|
|
mean value: 0.019618678092956542
|
|
|
|
key: test_mcc
|
|
value: [0.55901699 0.78262379 0.77561558 0.60678804 0.4869281 0.61059098
|
|
0.56011203 0.48524851 0.49507377 0.66229864]
|
|
|
|
mean value: 0.6024296444327484
|
|
|
|
key: train_mcc
|
|
value: [0.99369079 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9993690788750604
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.88571429 0.8 0.74285714 0.8
|
|
0.77142857 0.74285714 0.74285714 0.82857143]
|
|
|
|
mean value: 0.7980952380952381
|
|
|
|
key: train_accuracy
|
|
value: [0.99683544 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996835443037975
|
|
|
|
key: test_fscore
|
|
value: [0.78947368 0.88235294 0.875 0.77419355 0.74285714 0.81081081
|
|
0.75 0.75675676 0.72727273 0.82352941]
|
|
|
|
mean value: 0.7932247023236236
|
|
|
|
key: train_fscore
|
|
value: [0.99684543 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996845425867508
|
|
|
|
key: test_precision
|
|
value: [0.75 0.9375 0.93333333 0.85714286 0.72222222 0.75
|
|
0.85714286 0.73684211 0.8 0.875 ]
|
|
|
|
mean value: 0.8219183375104427
|
|
|
|
key: train_precision
|
|
value: [0.99371069 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9993710691823899
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.83333333 0.82352941 0.70588235 0.76470588 0.88235294
|
|
0.66666667 0.77777778 0.66666667 0.77777778]
|
|
|
|
mean value: 0.773202614379085
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.77777778 0.88888889 0.88398693 0.79738562 0.74346405 0.80228758
|
|
0.7745098 0.74183007 0.74509804 0.83006536]
|
|
|
|
mean value: 0.7985294117647058
|
|
|
|
key: train_roc_auc
|
|
value: [0.99683544 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996835443037975
|
|
|
|
key: test_jcc
|
|
value: [0.65217391 0.78947368 0.77777778 0.63157895 0.59090909 0.68181818
|
|
0.6 0.60869565 0.57142857 0.7 ]
|
|
|
|
mean value: 0.660385581872996
|
|
|
|
key: train_jcc
|
|
value: [0.99371069 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9993710691823899
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.10395193 0.06345844 0.06561041 0.06429601 0.07124066 0.10163808
|
|
0.06904268 0.06744313 0.0675621 0.0681169 ]
|
|
|
|
mean value: 0.07423603534698486
|
|
|
|
key: score_time
|
|
value: [0.01719022 0.0237124 0.02569318 0.02966261 0.02562976 0.02059865
|
|
0.03447008 0.03131509 0.02619195 0.03064704]
|
|
|
|
mean value: 0.026511096954345705
|
|
|
|
key: test_mcc
|
|
value: [0.3354102 0.52048344 0.08496732 0.46109408 0.19934641 0.34299717
|
|
0.36155076 0.31354672 0.14002801 0.19943817]
|
|
|
|
mean value: 0.2958862272765599
|
|
|
|
key: train_mcc
|
|
value: [0.66201856 0.86922699 0.80344002 0.70968872 0.71101985 0.68098909
|
|
0.86956126 0.76778044 0.76244394 0.92021604]
|
|
|
|
mean value: 0.7756384901709313
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.75 0.54285714 0.71428571 0.6 0.62857143
|
|
0.65714286 0.65714286 0.57142857 0.6 ]
|
|
|
|
mean value: 0.6388095238095238
|
|
|
|
key: train_accuracy
|
|
value: [0.81329114 0.93037975 0.89589905 0.84227129 0.83596215 0.8170347
|
|
0.93059937 0.88012618 0.86750789 0.95899054]
|
|
|
|
mean value: 0.8772062053268378
|
|
|
|
key: test_fscore
|
|
value: [0.64705882 0.7804878 0.52941176 0.75 0.58823529 0.71111111
|
|
0.57142857 0.68421053 0.61538462 0.65 ]
|
|
|
|
mean value: 0.6527328511471078
|
|
|
|
key: train_fscore
|
|
value: [0.83923706 0.93491124 0.90434783 0.86111111 0.85945946 0.84574468
|
|
0.92517007 0.88757396 0.88268156 0.96024465]
|
|
|
|
mean value: 0.8900481622420955
|
|
|
|
key: test_precision
|
|
value: [0.6875 0.69565217 0.52941176 0.65217391 0.58823529 0.57142857
|
|
0.8 0.65 0.57142857 0.59090909]
|
|
|
|
mean value: 0.6336739379546285
|
|
|
|
key: train_precision
|
|
value: [0.73684211 0.87777778 0.83870968 0.77114428 0.7535545 0.73271889
|
|
1. 0.83333333 0.79 0.92899408]
|
|
|
|
mean value: 0.8263074651619711
|
|
|
|
key: test_recall
|
|
value: [0.61111111 0.88888889 0.52941176 0.88235294 0.58823529 0.94117647
|
|
0.44444444 0.72222222 0.66666667 0.72222222]
|
|
|
|
mean value: 0.699673202614379
|
|
|
|
key: train_recall
|
|
value: [0.97468354 1. 0.98113208 0.97484277 1. 1.
|
|
0.86075949 0.94936709 1. 0.99367089]
|
|
|
|
mean value: 0.9734455855425523
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.75 0.54248366 0.71895425 0.5996732 0.6372549
|
|
0.66339869 0.65522876 0.56862745 0.59640523]
|
|
|
|
mean value: 0.6398692810457517
|
|
|
|
key: train_roc_auc
|
|
value: [0.81329114 0.93037975 0.89562933 0.84185176 0.83544304 0.8164557
|
|
0.93037975 0.88034392 0.86792453 0.95909959]
|
|
|
|
mean value: 0.8770798503303877
|
|
|
|
key: test_jcc
|
|
value: [0.47826087 0.64 0.36 0.6 0.41666667 0.55172414
|
|
0.4 0.52 0.44444444 0.48148148]
|
|
|
|
mean value: 0.48925776000888443
|
|
|
|
key: train_jcc
|
|
value: [0.72300469 0.87777778 0.82539683 0.75609756 0.7535545 0.73271889
|
|
0.86075949 0.79787234 0.79 0.92352941]
|
|
|
|
mean value: 0.8040711501225902
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04062843 0.0383327 0.04687071 0.03227305 0.05511498 0.07243466
|
|
0.05638766 0.04591179 0.05584884 0.05544066]
|
|
|
|
mean value: 0.04992434978485107
|
|
|
|
key: score_time
|
|
value: [0.02106357 0.02162671 0.03051853 0.05224371 0.03813624 0.03405857
|
|
0.03598046 0.02535057 0.03507161 0.03812766]
|
|
|
|
mean value: 0.033217763900756835
|
|
|
|
key: test_mcc
|
|
value: [0.39440532 0.61205637 0.60130719 0.66229864 0.19943817 0.61059098
|
|
0.42810458 0.19802951 0.54754393 0.54248366]
|
|
|
|
mean value: 0.47962583485985044
|
|
|
|
key: train_mcc
|
|
value: [0.77862138 0.75955453 0.74768104 0.74768104 0.79816076 0.72246328
|
|
0.75409053 0.7665698 0.74763156 0.7350822 ]
|
|
|
|
mean value: 0.7557536128515295
|
|
|
|
key: test_accuracy
|
|
value: [0.69444444 0.80555556 0.8 0.82857143 0.6 0.8
|
|
0.71428571 0.6 0.77142857 0.77142857]
|
|
|
|
mean value: 0.7385714285714285
|
|
|
|
key: train_accuracy
|
|
value: [0.88924051 0.87974684 0.87381703 0.87381703 0.89905363 0.86119874
|
|
0.87697161 0.88328076 0.87381703 0.86750789]
|
|
|
|
mean value: 0.8778451064169628
|
|
|
|
key: test_fscore
|
|
value: [0.71794872 0.8 0.8 0.83333333 0.53333333 0.81081081
|
|
0.72222222 0.63157895 0.76470588 0.77777778]
|
|
|
|
mean value: 0.7391711025147557
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.88817891 0.87898089 0.875 0.875 0.9 0.86075949
|
|
0.87774295 0.88253968 0.87341772 0.86792453]
|
|
|
|
mean value: 0.8779544178197671
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.82352941 0.77777778 0.78947368 0.61538462 0.75
|
|
0.72222222 0.6 0.8125 0.77777778]
|
|
|
|
mean value: 0.7335332155804292
|
|
|
|
key: train_precision
|
|
value: [0.89677419 0.88461538 0.86956522 0.86956522 0.89440994 0.86624204
|
|
0.86956522 0.88535032 0.87341772 0.8625 ]
|
|
|
|
mean value: 0.8772005246432769
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.77777778 0.82352941 0.88235294 0.47058824 0.88235294
|
|
0.72222222 0.66666667 0.72222222 0.77777778]
|
|
|
|
mean value: 0.7503267973856209
|
|
|
|
key: train_recall
|
|
value: [0.87974684 0.87341772 0.88050314 0.88050314 0.90566038 0.85534591
|
|
0.88607595 0.87974684 0.87341772 0.87341772]
|
|
|
|
mean value: 0.8787835363426479
|
|
|
|
key: test_roc_auc
|
|
value: [0.69444444 0.80555556 0.80065359 0.83006536 0.59640523 0.80228758
|
|
0.71405229 0.59803922 0.77287582 0.77124183]
|
|
|
|
mean value: 0.7385620915032679
|
|
|
|
key: train_roc_auc
|
|
value: [0.88924051 0.87974684 0.87379588 0.87379588 0.89903272 0.86121726
|
|
0.87700024 0.88326964 0.87381578 0.86752647]
|
|
|
|
mean value: 0.8778441206910278
|
|
|
|
key: test_jcc
|
|
value: [0.56 0.66666667 0.66666667 0.71428571 0.36363636 0.68181818
|
|
0.56521739 0.46153846 0.61904762 0.63636364]
|
|
|
|
mean value: 0.5935240701327658
|
|
|
|
key: train_jcc
|
|
value: [0.79885057 0.78409091 0.77777778 0.77777778 0.81818182 0.75555556
|
|
0.78212291 0.78977273 0.7752809 0.76666667]
|
|
|
|
mean value: 0.7826077610940213
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.33978271 0.42522049 0.4941678 0.48616838 0.46958041 0.43947339
|
|
0.49444461 0.43981433 0.48236465 0.43516088]
|
|
|
|
mean value: 0.45061776638031004
|
|
|
|
key: score_time
|
|
value: [0.04065251 0.03120565 0.05050755 0.03590274 0.04430676 0.03336024
|
|
0.03085423 0.04242754 0.03187752 0.04001975]
|
|
|
|
mean value: 0.03811144828796387
|
|
|
|
key: test_mcc
|
|
value: [0.4472136 0.77777778 0.61059098 0.66229864 0.19802951 0.61059098
|
|
0.54754393 0.19802951 0.66009836 0.65686275]
|
|
|
|
mean value: 0.536903603559525
|
|
|
|
key: train_mcc
|
|
value: [0.72175032 0.71531883 0.70348698 0.70977629 0.74133195 0.72239471
|
|
0.69715787 0.7665698 0.69715787 0.69740407]
|
|
|
|
mean value: 0.7172348695720924
|
|
|
|
key: test_accuracy
|
|
value: [0.72222222 0.88888889 0.8 0.82857143 0.6 0.8
|
|
0.77142857 0.6 0.82857143 0.82857143]
|
|
|
|
mean value: 0.7668253968253969
|
|
|
|
key: train_accuracy
|
|
value: [0.86075949 0.85759494 0.85173502 0.85488959 0.87066246 0.86119874
|
|
0.84858044 0.88328076 0.84858044 0.84858044]
|
|
|
|
mean value: 0.8585862316815078
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.88888889 0.81081081 0.83333333 0.5625 0.81081081
|
|
0.76470588 0.63157895 0.84210526 0.83333333]
|
|
|
|
mean value: 0.7714909375319592
|
|
|
|
key: train_fscore
|
|
value: [0.85897436 0.85623003 0.85173502 0.85534591 0.87147335 0.86163522
|
|
0.84810127 0.88253968 0.84810127 0.85 ]
|
|
|
|
mean value: 0.858413610718881
|
|
|
|
key: test_precision
|
|
value: [0.7 0.88888889 0.75 0.78947368 0.6 0.75
|
|
0.8125 0.6 0.8 0.83333333]
|
|
|
|
mean value: 0.7524195906432749
|
|
|
|
key: train_precision
|
|
value: [0.87012987 0.86451613 0.85443038 0.85534591 0.86875 0.86163522
|
|
0.84810127 0.88535032 0.84810127 0.83950617]
|
|
|
|
mean value: 0.8595866533940849
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.88888889 0.88235294 0.88235294 0.52941176 0.88235294
|
|
0.72222222 0.66666667 0.88888889 0.83333333]
|
|
|
|
mean value: 0.7954248366013071
|
|
|
|
key: train_recall
|
|
value: [0.84810127 0.84810127 0.8490566 0.85534591 0.87421384 0.86163522
|
|
0.84810127 0.87974684 0.84810127 0.86075949]
|
|
|
|
mean value: 0.8573162964732107
|
|
|
|
key: test_roc_auc
|
|
value: [0.72222222 0.88888889 0.80228758 0.83006536 0.59803922 0.80228758
|
|
0.77287582 0.59803922 0.82679739 0.82843137]
|
|
|
|
mean value: 0.7669934640522875
|
|
|
|
key: train_roc_auc
|
|
value: [0.86075949 0.85759494 0.85174349 0.85488815 0.87065122 0.86119736
|
|
0.84857893 0.88326964 0.84857893 0.84861874]
|
|
|
|
mean value: 0.8585880901202134
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.8 0.68181818 0.71428571 0.39130435 0.68181818
|
|
0.61904762 0.46153846 0.72727273 0.71428571]
|
|
|
|
mean value: 0.637470428122602
|
|
|
|
key: train_jcc
|
|
value: [0.75280899 0.74860335 0.74175824 0.74725275 0.77222222 0.75690608
|
|
0.73626374 0.78977273 0.73626374 0.73913043]
|
|
|
|
mean value: 0.7520982263883438
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09176397 0.11980963 0.11011243 0.14975119 0.13808537 0.16043615
|
|
0.1992569 0.11476517 0.09389091 0.11426997]
|
|
|
|
mean value: 0.12921416759490967
|
|
|
|
key: score_time
|
|
value: [0.01855183 0.03585911 0.02424717 0.02689886 0.04189634 0.04716086
|
|
0.04669476 0.05341172 0.04221559 0.02758384]
|
|
|
|
mean value: 0.03645200729370117
|
|
|
|
key: test_mcc
|
|
value: [0.63123793 0.50454827 0.72077922 0.48917749 0.58134627 0.58824786
|
|
0.76789769 0.72451364 0.21040933 0.40088002]
|
|
|
|
mean value: 0.5619037712320794
|
|
|
|
key: train_mcc
|
|
value: [0.72610972 0.7158578 0.67961217 0.71059238 0.7002858 0.71059238
|
|
0.68500216 0.71062262 0.74164686 0.74226246]
|
|
|
|
mean value: 0.7122584348906272
|
|
|
|
key: test_accuracy
|
|
value: [0.81395349 0.74418605 0.86046512 0.74418605 0.79069767 0.79069767
|
|
0.88372093 0.86046512 0.60465116 0.69767442]
|
|
|
|
mean value: 0.7790697674418604
|
|
|
|
key: train_accuracy
|
|
value: [0.8630491 0.85788114 0.83979328 0.85529716 0.8501292 0.85529716
|
|
0.84237726 0.85529716 0.87080103 0.87080103]
|
|
|
|
mean value: 0.8560723514211886
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.76595745 0.85714286 0.74418605 0.7804878 0.7804878
|
|
0.88888889 0.85714286 0.65306122 0.68292683]
|
|
|
|
mean value: 0.7828463578190746
|
|
|
|
key: train_fscore
|
|
value: [0.8630491 0.85714286 0.84102564 0.8556701 0.85128205 0.85492228
|
|
0.84398977 0.85416667 0.87113402 0.87309645]
|
|
|
|
mean value: 0.8565478931750017
|
|
|
|
key: test_precision
|
|
value: [0.7826087 0.69230769 0.85714286 0.72727273 0.8 0.84210526
|
|
0.86956522 0.9 0.59259259 0.73684211]
|
|
|
|
mean value: 0.78004371507804
|
|
|
|
key: train_precision
|
|
value: [0.86528497 0.86387435 0.83673469 0.8556701 0.84693878 0.85492228
|
|
0.83333333 0.85863874 0.86666667 0.85572139]
|
|
|
|
mean value: 0.853778530840661
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.85714286 0.85714286 0.76190476 0.76190476 0.72727273
|
|
0.90909091 0.81818182 0.72727273 0.63636364]
|
|
|
|
mean value: 0.7913419913419913
|
|
|
|
key: train_recall
|
|
value: [0.86082474 0.85051546 0.84536082 0.8556701 0.8556701 0.85492228
|
|
0.85492228 0.84974093 0.87564767 0.89119171]
|
|
|
|
mean value: 0.8594466107579724
|
|
|
|
key: test_roc_auc
|
|
value: [0.81493506 0.74675325 0.86038961 0.74458874 0.79004329 0.79220779
|
|
0.88311688 0.86147186 0.6017316 0.6991342 ]
|
|
|
|
mean value: 0.7794372294372295
|
|
|
|
key: train_roc_auc
|
|
value: [0.86305486 0.85790022 0.83977886 0.85529619 0.85011484 0.85529619
|
|
0.84240959 0.85528284 0.87081352 0.87085359]
|
|
|
|
mean value: 0.856080070509054
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.62068966 0.75 0.59259259 0.64 0.64
|
|
0.8 0.75 0.48484848 0.51851852]
|
|
|
|
mean value: 0.6488956943439702
|
|
|
|
key: train_jcc
|
|
value: [0.75909091 0.75 0.72566372 0.74774775 0.74107143 0.74660633
|
|
0.7300885 0.74545455 0.7716895 0.77477477]
|
|
|
|
mean value: 0.749218745058731
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.05319595 2.25262332 2.25992036 1.86790586 1.25109744 1.45810485
|
|
1.15412784 1.81505632 1.78134704 1.90901518]
|
|
|
|
mean value: 1.7802394151687622
|
|
|
|
key: score_time
|
|
value: [0.02578926 0.02598405 0.02641463 0.01519704 0.01256347 0.01458597
|
|
0.01378655 0.02464509 0.02310419 0.01562047]
|
|
|
|
mean value: 0.01976907253265381
|
|
|
|
key: test_mcc
|
|
value: [0.58225108 0.49456394 0.72077922 0.48917749 0.53463203 0.58824786
|
|
0.77418983 0.72451364 0.30265778 0.25490741]
|
|
|
|
mean value: 0.5465920274282123
|
|
|
|
key: train_mcc
|
|
value: [0.63313389 0.67962928 0.65393972 0.67974678 0.63325946 0.67488854
|
|
0.65375781 0.67451054 0.70543774 0.63857648]
|
|
|
|
mean value: 0.6626880247473126
|
|
|
|
key: test_accuracy
|
|
value: [0.79069767 0.74418605 0.86046512 0.74418605 0.76744186 0.79069767
|
|
0.88372093 0.86046512 0.65116279 0.62790698]
|
|
|
|
mean value: 0.772093023255814
|
|
|
|
key: train_accuracy
|
|
value: [0.81653747 0.83979328 0.82687339 0.83979328 0.81653747 0.8372093
|
|
0.82687339 0.8372093 0.85271318 0.81912145]
|
|
|
|
mean value: 0.831266149870801
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.75555556 0.85714286 0.74418605 0.76190476 0.7804878
|
|
0.89361702 0.85714286 0.68085106 0.65217391]
|
|
|
|
mean value: 0.7773759555704174
|
|
|
|
key: train_fscore
|
|
value: [0.81841432 0.83937824 0.82951654 0.83854167 0.81933842 0.83969466
|
|
0.82687339 0.83804627 0.85271318 0.82142857]
|
|
|
|
mean value: 0.8323945252809524
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.70833333 0.85714286 0.72727273 0.76190476 0.84210526
|
|
0.84 0.9 0.64 0.625 ]
|
|
|
|
mean value: 0.7674486215538847
|
|
|
|
key: train_precision
|
|
value: [0.81218274 0.84375 0.81909548 0.84736842 0.80904523 0.825
|
|
0.82474227 0.83163265 0.85051546 0.80904523]
|
|
|
|
mean value: 0.8272377476837611
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.80952381 0.85714286 0.76190476 0.76190476 0.72727273
|
|
0.95454545 0.81818182 0.72727273 0.68181818]
|
|
|
|
mean value: 0.7909090909090909
|
|
|
|
key: train_recall
|
|
value: [0.82474227 0.83505155 0.84020619 0.82989691 0.82989691 0.85492228
|
|
0.82901554 0.84455959 0.85492228 0.83419689]
|
|
|
|
mean value: 0.837741039474387
|
|
|
|
key: test_roc_auc
|
|
value: [0.79112554 0.745671 0.86038961 0.74458874 0.76731602 0.79220779
|
|
0.88203463 0.86147186 0.64935065 0.62662338]
|
|
|
|
mean value: 0.7720779220779221
|
|
|
|
key: train_roc_auc
|
|
value: [0.81651621 0.83980557 0.82683884 0.83981892 0.81650286 0.83725495
|
|
0.82687891 0.83722825 0.85271887 0.8191603 ]
|
|
|
|
mean value: 0.8312723679290636
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.60714286 0.75 0.59259259 0.61538462 0.64
|
|
0.80769231 0.75 0.51612903 0.48387097]
|
|
|
|
mean value: 0.6416658526658526
|
|
|
|
key: train_jcc
|
|
value: [0.69264069 0.72321429 0.70869565 0.72197309 0.69396552 0.72368421
|
|
0.70484581 0.72123894 0.74324324 0.6969697 ]
|
|
|
|
mean value: 0.7130471145711001
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0431459 0.01635933 0.01623845 0.01651263 0.01639867 0.01648521
|
|
0.01640391 0.01852489 0.01559401 0.01632786]
|
|
|
|
mean value: 0.019199085235595704
|
|
|
|
key: score_time
|
|
value: [0.01493192 0.01494384 0.0147562 0.01472116 0.0147326 0.01476002
|
|
0.01519728 0.0136559 0.0147171 0.01477838]
|
|
|
|
mean value: 0.014719438552856446
|
|
|
|
key: test_mcc
|
|
value: [0.3071961 0.26318068 0.55959928 0.55959928 0.27394005 0.30666041
|
|
0.34318385 0.25490741 0.35868355 0.35185603]
|
|
|
|
mean value: 0.35788066273155245
|
|
|
|
key: train_mcc
|
|
value: [0.42261743 0.43687627 0.37528635 0.39033754 0.41539874 0.37697052
|
|
0.3880617 0.40675265 0.42447646 0.41278926]
|
|
|
|
mean value: 0.4049566920759974
|
|
|
|
key: test_accuracy
|
|
value: [0.65116279 0.62790698 0.76744186 0.76744186 0.60465116 0.65116279
|
|
0.65116279 0.62790698 0.6744186 0.6744186 ]
|
|
|
|
mean value: 0.6697674418604651
|
|
|
|
key: train_accuracy
|
|
value: [0.70542636 0.71059432 0.67700258 0.6873385 0.7002584 0.67958656
|
|
0.66925065 0.69509044 0.70542636 0.69767442]
|
|
|
|
mean value: 0.692764857881137
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.65217391 0.79166667 0.79166667 0.69090909 0.69387755
|
|
0.72727273 0.65217391 0.72 0.70833333]
|
|
|
|
mean value: 0.7094740528622516
|
|
|
|
key: train_fscore
|
|
value: [0.73732719 0.74545455 0.72406181 0.7268623 0.73636364 0.72072072
|
|
0.73333333 0.73181818 0.73732719 0.73469388]
|
|
|
|
mean value: 0.7327962785759218
|
|
|
|
key: test_precision
|
|
value: [0.625 0.6 0.7037037 0.7037037 0.55882353 0.62962963
|
|
0.60606061 0.625 0.64285714 0.65384615]
|
|
|
|
mean value: 0.6348624469212705
|
|
|
|
key: train_precision
|
|
value: [0.66666667 0.66666667 0.63320463 0.64658635 0.65853659 0.6374502
|
|
0.61324042 0.65182186 0.66390041 0.65322581]
|
|
|
|
mean value: 0.6491299598344551
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.71428571 0.9047619 0.9047619 0.9047619 0.77272727
|
|
0.90909091 0.68181818 0.81818182 0.77272727]
|
|
|
|
mean value: 0.8097402597402598
|
|
|
|
key: train_recall
|
|
value: [0.82474227 0.84536082 0.84536082 0.82989691 0.83505155 0.82901554
|
|
0.9119171 0.83419689 0.82901554 0.83937824]
|
|
|
|
mean value: 0.8423935687196197
|
|
|
|
key: test_roc_auc
|
|
value: [0.6525974 0.62987013 0.77056277 0.77056277 0.61147186 0.6482684
|
|
0.64502165 0.62662338 0.67099567 0.67207792]
|
|
|
|
mean value: 0.6698051948051948
|
|
|
|
key: train_roc_auc
|
|
value: [0.70511725 0.71024518 0.67656642 0.68696918 0.69990919 0.67997169
|
|
0.66987607 0.69544896 0.70574489 0.69803963]
|
|
|
|
mean value: 0.6927888467496394
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.48387097 0.65517241 0.65517241 0.52777778 0.53125
|
|
0.57142857 0.48387097 0.5625 0.5483871 ]
|
|
|
|
mean value: 0.5519430209050621
|
|
|
|
key: train_jcc
|
|
value: [0.58394161 0.5942029 0.56747405 0.57092199 0.58273381 0.56338028
|
|
0.57894737 0.57706093 0.58394161 0.58064516]
|
|
|
|
mean value: 0.5783249700738864
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0163424 0.01679897 0.01675701 0.01685286 0.016855 0.01681352
|
|
0.01682734 0.01688409 0.01697421 0.01698279]
|
|
|
|
mean value: 0.01680881977081299
|
|
|
|
key: score_time
|
|
value: [0.01396537 0.01491928 0.01495099 0.01483655 0.01477695 0.01482654
|
|
0.01482534 0.01485395 0.0148468 0.01482868]
|
|
|
|
mean value: 0.01476304531097412
|
|
|
|
key: test_mcc
|
|
value: [0.35141081 0.25490741 0.53595916 0.4517935 0.31423621 0.48807056
|
|
0.4912706 0.2581351 0.44701207 0.3961039 ]
|
|
|
|
mean value: 0.3988899295731957
|
|
|
|
key: train_mcc
|
|
value: [0.50010976 0.51422314 0.45901322 0.48339175 0.48864075 0.49401307
|
|
0.46826734 0.50904089 0.47386097 0.47386097]
|
|
|
|
mean value: 0.48644218743566964
|
|
|
|
key: test_accuracy
|
|
value: [0.6744186 0.62790698 0.76744186 0.72093023 0.65116279 0.74418605
|
|
0.74418605 0.62790698 0.72093023 0.69767442]
|
|
|
|
mean value: 0.6976744186046512
|
|
|
|
key: train_accuracy
|
|
value: [0.74935401 0.75710594 0.72868217 0.74160207 0.74418605 0.74677003
|
|
0.73385013 0.75452196 0.73643411 0.73643411]
|
|
|
|
mean value: 0.7428940568475452
|
|
|
|
key: test_fscore
|
|
value: [0.68181818 0.6 0.75 0.73913043 0.68085106 0.75555556
|
|
0.76595745 0.61904762 0.75 0.69767442]
|
|
|
|
mean value: 0.7040034720446915
|
|
|
|
key: train_fscore
|
|
value: [0.75930521 0.75897436 0.74074074 0.74619289 0.74936709 0.75126904
|
|
0.73924051 0.75324675 0.74371859 0.74371859]
|
|
|
|
mean value: 0.7485773773680334
|
|
|
|
key: test_precision
|
|
value: [0.65217391 0.63157895 0.78947368 0.68 0.61538462 0.73913043
|
|
0.72 0.65 0.69230769 0.71428571]
|
|
|
|
mean value: 0.6884335001383056
|
|
|
|
key: train_precision
|
|
value: [0.73205742 0.75510204 0.71090047 0.735 0.73631841 0.73631841
|
|
0.72277228 0.75520833 0.72195122 0.72195122]
|
|
|
|
mean value: 0.7327579796523762
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.57142857 0.71428571 0.80952381 0.76190476 0.77272727
|
|
0.81818182 0.59090909 0.81818182 0.68181818]
|
|
|
|
mean value: 0.7253246753246754
|
|
|
|
key: train_recall
|
|
value: [0.78865979 0.7628866 0.77319588 0.75773196 0.7628866 0.76683938
|
|
0.75647668 0.75129534 0.76683938 0.76683938]
|
|
|
|
mean value: 0.7653650980182682
|
|
|
|
key: test_roc_auc
|
|
value: [0.67532468 0.62662338 0.76623377 0.72294372 0.65367965 0.74350649
|
|
0.74242424 0.62878788 0.71861472 0.69805195]
|
|
|
|
mean value: 0.6976190476190477
|
|
|
|
key: train_roc_auc
|
|
value: [0.74925218 0.75709097 0.72856685 0.74156028 0.7441376 0.74682175
|
|
0.73390845 0.75451365 0.73651247 0.73651247]
|
|
|
|
mean value: 0.7428876662571444
|
|
|
|
key: test_jcc
|
|
value: [0.51724138 0.42857143 0.6 0.5862069 0.51612903 0.60714286
|
|
0.62068966 0.44827586 0.6 0.53571429]
|
|
|
|
mean value: 0.5459971396790084
|
|
|
|
key: train_jcc
|
|
value: [0.612 0.61157025 0.58823529 0.5951417 0.59919028 0.60162602
|
|
0.58634538 0.60416667 0.592 0.592 ]
|
|
|
|
mean value: 0.5982275590310133
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01586819 0.0141995 0.02298093 0.02030969 0.01366186 0.0336926
|
|
0.01359272 0.03401446 0.01413941 0.02369332]
|
|
|
|
mean value: 0.020615267753601074
|
|
|
|
key: score_time
|
|
value: [0.03825784 0.03617787 0.05568075 0.05602121 0.04199529 0.04548907
|
|
0.04207611 0.04852986 0.06230307 0.03881931]
|
|
|
|
mean value: 0.04653503894805908
|
|
|
|
key: test_mcc
|
|
value: [0.34859132 0.3030303 0.44155844 0.35748709 0.16887427 0.2581351
|
|
0.25541126 0.16485939 0.11404496 0.35748709]
|
|
|
|
mean value: 0.2769479215358097
|
|
|
|
key: train_mcc
|
|
value: [0.52475891 0.4780321 0.49373354 0.48437399 0.52463135 0.48366935
|
|
0.52454463 0.51449492 0.55103485 0.49958596]
|
|
|
|
mean value: 0.5078859601431154
|
|
|
|
key: test_accuracy
|
|
value: [0.6744186 0.65116279 0.72093023 0.6744186 0.58139535 0.62790698
|
|
0.62790698 0.58139535 0.55813953 0.6744186 ]
|
|
|
|
mean value: 0.6372093023255814
|
|
|
|
key: train_accuracy
|
|
value: [0.7622739 0.73901809 0.74677003 0.74160207 0.7622739 0.74160207
|
|
0.7622739 0.75710594 0.7751938 0.74935401]
|
|
|
|
mean value: 0.7537467700258398
|
|
|
|
key: test_fscore
|
|
value: [0.65 0.65116279 0.71428571 0.69565217 0.60869565 0.61904762
|
|
0.63636364 0.57142857 0.59574468 0.65 ]
|
|
|
|
mean value: 0.6392380838761236
|
|
|
|
key: train_fscore
|
|
value: [0.76649746 0.7403599 0.75126904 0.75124378 0.76530612 0.74619289
|
|
0.76165803 0.76020408 0.77974684 0.75566751]
|
|
|
|
mean value: 0.757814564603969
|
|
|
|
key: test_precision
|
|
value: [0.68421053 0.63636364 0.71428571 0.64 0.56 0.65
|
|
0.63636364 0.6 0.56 0.72222222]
|
|
|
|
mean value: 0.6403445735550999
|
|
|
|
key: train_precision
|
|
value: [0.755 0.73846154 0.74 0.72596154 0.75757576 0.73134328
|
|
0.76165803 0.74874372 0.76237624 0.73529412]
|
|
|
|
mean value: 0.7456414223032793
|
|
|
|
key: test_recall
|
|
value: [0.61904762 0.66666667 0.71428571 0.76190476 0.66666667 0.59090909
|
|
0.63636364 0.54545455 0.63636364 0.59090909]
|
|
|
|
mean value: 0.6428571428571428
|
|
|
|
key: train_recall
|
|
value: [0.77835052 0.74226804 0.7628866 0.77835052 0.77319588 0.76165803
|
|
0.76165803 0.77202073 0.79792746 0.77720207]
|
|
|
|
mean value: 0.7705517867635276
|
|
|
|
key: test_roc_auc
|
|
value: [0.67316017 0.65151515 0.72077922 0.67640693 0.58333333 0.62878788
|
|
0.62770563 0.58225108 0.55627706 0.67640693]
|
|
|
|
mean value: 0.6376623376623376
|
|
|
|
key: train_roc_auc
|
|
value: [0.76223225 0.73900967 0.74672827 0.74150686 0.76224561 0.74165376
|
|
0.76227231 0.75714438 0.77525239 0.74942578]
|
|
|
|
mean value: 0.7537471288926874
|
|
|
|
key: test_jcc
|
|
value: [0.48148148 0.48275862 0.55555556 0.53333333 0.4375 0.44827586
|
|
0.46666667 0.4 0.42424242 0.48148148]
|
|
|
|
mean value: 0.4711295425519563
|
|
|
|
key: train_jcc
|
|
value: [0.62139918 0.5877551 0.60162602 0.60159363 0.61983471 0.5951417
|
|
0.61506276 0.61316872 0.63900415 0.60728745]
|
|
|
|
mean value: 0.6101873416458796
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02942824 0.03070235 0.03118563 0.03116775 0.03088856 0.03088307
|
|
0.03110266 0.03078842 0.03027916 0.03081298]
|
|
|
|
mean value: 0.030723881721496583
|
|
|
|
key: score_time
|
|
value: [0.01875687 0.01874995 0.0188489 0.01890826 0.01899743 0.01892638
|
|
0.0193913 0.01888132 0.01872396 0.01879382]
|
|
|
|
mean value: 0.018897819519042968
|
|
|
|
key: test_mcc
|
|
value: [0.58225108 0.58824786 0.72077922 0.58824786 0.4517935 0.58824786
|
|
0.58557701 0.55959928 0.41330345 0.34859132]
|
|
|
|
mean value: 0.5426638435616064
|
|
|
|
key: train_mcc
|
|
value: [0.72193009 0.70123323 0.69518417 0.71095739 0.69047778 0.69557211
|
|
0.72617268 0.7109111 0.71656987 0.71625569]
|
|
|
|
mean value: 0.7085264102208928
|
|
|
|
key: test_accuracy
|
|
value: [0.79069767 0.79069767 0.86046512 0.79069767 0.72093023 0.79069767
|
|
0.79069767 0.76744186 0.69767442 0.6744186 ]
|
|
|
|
mean value: 0.7674418604651163
|
|
|
|
key: train_accuracy
|
|
value: [0.86046512 0.8501292 0.84754522 0.85529716 0.84496124 0.84754522
|
|
0.8630491 0.85529716 0.85788114 0.85788114]
|
|
|
|
mean value: 0.8540051679586563
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.8 0.85714286 0.8 0.73913043 0.7804878
|
|
0.80851064 0.73684211 0.74509804 0.69565217]
|
|
|
|
mean value: 0.775356172791188
|
|
|
|
key: train_fscore
|
|
value: [0.85714286 0.84656085 0.84675325 0.85340314 0.84848485 0.84987277
|
|
0.8616188 0.85263158 0.86075949 0.86005089]
|
|
|
|
mean value: 0.853727847599906
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.75 0.85714286 0.75 0.68 0.84210526
|
|
0.76 0.875 0.65517241 0.66666667]
|
|
|
|
mean value: 0.7608814473487795
|
|
|
|
key: train_precision
|
|
value: [0.88043478 0.86956522 0.85340314 0.86702128 0.83168317 0.835
|
|
0.86842105 0.86631016 0.84158416 0.845 ]
|
|
|
|
mean value: 0.8558422957749061
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.85714286 0.85714286 0.85714286 0.80952381 0.72727273
|
|
0.86363636 0.63636364 0.86363636 0.72727273]
|
|
|
|
mean value: 0.8008658008658008
|
|
|
|
key: train_recall
|
|
value: [0.83505155 0.82474227 0.84020619 0.84020619 0.86597938 0.86528497
|
|
0.85492228 0.83937824 0.88082902 0.87564767]
|
|
|
|
mean value: 0.8522247743176112
|
|
|
|
key: test_roc_auc
|
|
value: [0.79112554 0.79220779 0.86038961 0.79220779 0.72294372 0.79220779
|
|
0.78896104 0.77056277 0.69372294 0.67316017]
|
|
|
|
mean value: 0.7677489177489177
|
|
|
|
key: train_roc_auc
|
|
value: [0.86053095 0.85019497 0.84756423 0.85533625 0.84490679 0.84759094
|
|
0.86302815 0.85525613 0.85794028 0.85792693]
|
|
|
|
mean value: 0.8540275626302014
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.66666667 0.75 0.66666667 0.5862069 0.64
|
|
0.67857143 0.58333333 0.59375 0.53333333]
|
|
|
|
mean value: 0.6352374478969307
|
|
|
|
key: train_jcc
|
|
value: [0.75 0.73394495 0.73423423 0.74429224 0.73684211 0.73893805
|
|
0.75688073 0.74311927 0.75555556 0.75446429]
|
|
|
|
mean value: 0.7448271425435942
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.31610084 3.66471815 3.69096637 2.90685415 3.13831234 3.5732975
|
|
3.3353126 3.1690793 3.41401672 2.58294654]
|
|
|
|
mean value: 3.2791604518890383
|
|
|
|
key: score_time
|
|
value: [0.02754307 0.02450562 0.03252292 0.0326395 0.03124905 0.02395105
|
|
0.0236342 0.03183818 0.03920364 0.01291871]
|
|
|
|
mean value: 0.028000593185424805
|
|
|
|
key: test_mcc
|
|
value: [0.62770563 0.58824786 0.4912706 0.58134627 0.44227524 0.79001638
|
|
0.72077922 0.68193178 0.44227524 0.40088002]
|
|
|
|
mean value: 0.5766728235571799
|
|
|
|
key: train_mcc
|
|
value: [0.9741727 0.96899204 0.9638374 0.97417339 0.96899204 0.9638374
|
|
0.96904463 0.97417339 0.98450896 0.98461498]
|
|
|
|
mean value: 0.9726346929348463
|
|
|
|
key: test_accuracy
|
|
value: [0.81395349 0.79069767 0.74418605 0.79069767 0.72093023 0.88372093
|
|
0.86046512 0.8372093 0.72093023 0.69767442]
|
|
|
|
mean value: 0.786046511627907
|
|
|
|
key: train_accuracy
|
|
value: [0.9870801 0.98449612 0.98191214 0.9870801 0.98449612 0.98191214
|
|
0.98449612 0.9870801 0.99224806 0.99224806]
|
|
|
|
mean value: 0.9863049095607235
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.8 0.71794872 0.7804878 0.7 0.87179487
|
|
0.86363636 0.82926829 0.73913043 0.68292683]
|
|
|
|
mean value: 0.779471712451564
|
|
|
|
key: train_fscore
|
|
value: [0.98714653 0.98453608 0.98191214 0.9870801 0.98453608 0.98191214
|
|
0.98453608 0.9870801 0.99220779 0.99228792]
|
|
|
|
mean value: 0.9863234983055275
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.75 0.77777778 0.8 0.73684211 1.
|
|
0.86363636 0.89473684 0.70833333 0.73684211]
|
|
|
|
mean value: 0.8077692336902863
|
|
|
|
key: train_precision
|
|
value: [0.98461538 0.98453608 0.98445596 0.98963731 0.98453608 0.97938144
|
|
0.97948718 0.98453608 0.99479167 0.98469388]
|
|
|
|
mean value: 0.9850671063290606
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.85714286 0.66666667 0.76190476 0.66666667 0.77272727
|
|
0.86363636 0.77272727 0.77272727 0.63636364]
|
|
|
|
mean value: 0.758008658008658
|
|
|
|
key: train_recall
|
|
value: [0.98969072 0.98453608 0.97938144 0.98453608 0.98453608 0.98445596
|
|
0.98963731 0.98963731 0.98963731 1. ]
|
|
|
|
mean value: 0.9876048288018803
|
|
|
|
key: test_roc_auc
|
|
value: [0.81385281 0.79220779 0.74242424 0.79004329 0.71969697 0.88636364
|
|
0.86038961 0.83874459 0.71969697 0.6991342 ]
|
|
|
|
mean value: 0.7862554112554112
|
|
|
|
key: train_roc_auc
|
|
value: [0.98707334 0.98449602 0.9819187 0.98708669 0.98449602 0.9819187
|
|
0.98450937 0.98708669 0.99224133 0.99226804]
|
|
|
|
mean value: 0.9863094920143155
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.66666667 0.56 0.64 0.53846154 0.77272727
|
|
0.76 0.70833333 0.5862069 0.51851852]
|
|
|
|
mean value: 0.6430914226259054
|
|
|
|
key: train_jcc
|
|
value: [0.97461929 0.96954315 0.96446701 0.9744898 0.96954315 0.96446701
|
|
0.96954315 0.9744898 0.98453608 0.98469388]
|
|
|
|
mean value: 0.9730392292978733
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03067994 0.02926111 0.02483511 0.0242579 0.03077769 0.02583909
|
|
0.02731919 0.02467728 0.02925134 0.02427554]
|
|
|
|
mean value: 0.027117419242858886
|
|
|
|
key: score_time
|
|
value: [0.01235723 0.01009178 0.00929546 0.00893426 0.0089798 0.00907421
|
|
0.00918913 0.00918341 0.00910568 0.00909543]
|
|
|
|
mean value: 0.0095306396484375
|
|
|
|
key: test_mcc
|
|
value: [0.53463203 0.53463203 0.72451364 0.53595916 0.53796222 0.64040632
|
|
0.34848485 0.44468651 0.40088002 0.26318068]
|
|
|
|
mean value: 0.4965337462592751
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.76744186 0.86046512 0.76744186 0.76744186 0.81395349
|
|
0.6744186 0.72093023 0.69767442 0.62790698]
|
|
|
|
mean value: 0.7465116279069768
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.76190476 0.86363636 0.75 0.77272727 0.8
|
|
0.68181818 0.71428571 0.68292683 0.6 ]
|
|
|
|
mean value: 0.7389203885545349
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76190476 0.76190476 0.82608696 0.78947368 0.73913043 0.88888889
|
|
0.68181818 0.75 0.73684211 0.66666667]
|
|
|
|
mean value: 0.7602716441961292
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.76190476 0.76190476 0.9047619 0.71428571 0.80952381 0.72727273
|
|
0.68181818 0.68181818 0.63636364 0.54545455]
|
|
|
|
mean value: 0.7225108225108225
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76731602 0.76731602 0.86147186 0.76623377 0.76839827 0.81601732
|
|
0.67424242 0.72186147 0.6991342 0.62987013]
|
|
|
|
mean value: 0.7471861471861472
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.61538462 0.76 0.6 0.62962963 0.66666667
|
|
0.51724138 0.55555556 0.51851852 0.42857143]
|
|
|
|
mean value: 0.5906952409021374
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11986899 0.12324095 0.12095547 0.11996508 0.12497091 0.11812854
|
|
0.11927009 0.11958623 0.12523746 0.1254375 ]
|
|
|
|
mean value: 0.12166612148284912
|
|
|
|
key: score_time
|
|
value: [0.01776338 0.0179534 0.01831174 0.0179131 0.01776171 0.01782417
|
|
0.01797843 0.01762795 0.01887631 0.01881528]
|
|
|
|
mean value: 0.018082547187805175
|
|
|
|
key: test_mcc
|
|
value: [0.53595916 0.58557701 0.72077922 0.63123793 0.48917749 0.58557701
|
|
0.48807056 0.58225108 0.44701207 0.4633482 ]
|
|
|
|
mean value: 0.5528989731062869
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.79069767 0.86046512 0.81395349 0.74418605 0.79069767
|
|
0.74418605 0.79069767 0.72093023 0.72093023]
|
|
|
|
mean value: 0.7744186046511627
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.76923077 0.85714286 0.81818182 0.74418605 0.80851064
|
|
0.75555556 0.79069767 0.75 0.68421053]
|
|
|
|
mean value: 0.7727715885654894
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.83333333 0.85714286 0.7826087 0.72727273 0.76
|
|
0.73913043 0.80952381 0.69230769 0.8125 ]
|
|
|
|
mean value: 0.7803293234225729
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.71428571 0.85714286 0.85714286 0.76190476 0.86363636
|
|
0.77272727 0.77272727 0.81818182 0.59090909]
|
|
|
|
mean value: 0.7722943722943723
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76623377 0.78896104 0.86038961 0.81493506 0.74458874 0.78896104
|
|
0.74350649 0.79112554 0.71861472 0.72402597]
|
|
|
|
mean value: 0.7741341991341991
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.625 0.75 0.69230769 0.59259259 0.67857143
|
|
0.60714286 0.65384615 0.6 0.52 ]
|
|
|
|
mean value: 0.6319460724460725
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0113728 0.01004457 0.01123762 0.01008534 0.01026392 0.01027369
|
|
0.01014042 0.01014757 0.01015282 0.01003885]
|
|
|
|
mean value: 0.010375761985778808
|
|
|
|
key: score_time
|
|
value: [0.00955272 0.00899649 0.00898719 0.00902295 0.00880337 0.00935388
|
|
0.00888276 0.00890851 0.00882626 0.0089283 ]
|
|
|
|
mean value: 0.009026241302490235
|
|
|
|
key: test_mcc
|
|
value: [0.07158368 0.20824344 0.45629995 0.48917749 0.39696419 0.16887427
|
|
0.58225108 0.58134627 0.58134627 0.21908017]
|
|
|
|
mean value: 0.3755166816788424
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.53488372 0.60465116 0.72093023 0.74418605 0.69767442 0.58139535
|
|
0.79069767 0.79069767 0.79069767 0.60465116]
|
|
|
|
mean value: 0.686046511627907
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.58536585 0.66666667 0.74418605 0.66666667 0.55
|
|
0.79069767 0.8 0.8 0.56410256]
|
|
|
|
mean value: 0.6713140017479212
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.52173913 0.6 0.8 0.72727273 0.72222222 0.61111111
|
|
0.80952381 0.7826087 0.7826087 0.64705882]
|
|
|
|
mean value: 0.7004145215398413
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.57142857 0.57142857 0.57142857 0.76190476 0.61904762 0.5
|
|
0.77272727 0.81818182 0.81818182 0.5 ]
|
|
|
|
mean value: 0.6504329004329005
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.53571429 0.6038961 0.71753247 0.74458874 0.69588745 0.58333333
|
|
0.79112554 0.79004329 0.79004329 0.60714286]
|
|
|
|
mean value: 0.685930735930736
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.4137931 0.5 0.59259259 0.5 0.37931034
|
|
0.65384615 0.66666667 0.66666667 0.39285714]
|
|
|
|
mean value: 0.5140732670905085
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.72484875 1.69839096 1.69777799 1.69739676 1.70870376 1.71582556
|
|
1.71650362 1.70737052 1.68216228 1.69348478]
|
|
|
|
mean value: 1.704246497154236
|
|
|
|
key: score_time
|
|
value: [0.09137702 0.09932709 0.09243751 0.09427667 0.093925 0.09772754
|
|
0.0989058 0.09075212 0.09389806 0.09104729]
|
|
|
|
mean value: 0.0943674087524414
|
|
|
|
key: test_mcc
|
|
value: [0.62964308 0.48917749 0.86147186 0.81778934 0.67462198 0.76789769
|
|
0.68193178 0.58824786 0.44701207 0.51986413]
|
|
|
|
mean value: 0.647765726359847
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81395349 0.74418605 0.93023256 0.90697674 0.8372093 0.88372093
|
|
0.8372093 0.79069767 0.72093023 0.74418605]
|
|
|
|
mean value: 0.8209302325581396
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.74418605 0.93023256 0.90909091 0.82926829 0.88888889
|
|
0.82926829 0.7804878 0.75 0.7027027 ]
|
|
|
|
mean value: 0.8164125495577567
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84210526 0.72727273 0.90909091 0.86956522 0.85 0.86956522
|
|
0.89473684 0.84210526 0.69230769 0.86666667]
|
|
|
|
mean value: 0.8363415798541657
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.76190476 0.76190476 0.95238095 0.95238095 0.80952381 0.90909091
|
|
0.77272727 0.72727273 0.81818182 0.59090909]
|
|
|
|
mean value: 0.8056277056277056
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81277056 0.74458874 0.93073593 0.90800866 0.83658009 0.88311688
|
|
0.83874459 0.79220779 0.71861472 0.7478355 ]
|
|
|
|
mean value: 0.8213203463203463
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.59259259 0.86956522 0.83333333 0.70833333 0.8
|
|
0.70833333 0.64 0.6 0.54166667]
|
|
|
|
mean value: 0.696049114331723
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.95613575 1.00466895 0.9780705 0.99141526 1.00166965 1.02008724
|
|
0.98691678 1.09668422 1.02446008 1.00150752]
|
|
|
|
mean value: 1.006161594390869
|
|
|
|
key: score_time
|
|
value: [0.22908759 0.2125628 0.1826551 0.24825335 0.14801216 0.20393419
|
|
0.1265645 0.21994519 0.20560575 0.27162194]
|
|
|
|
mean value: 0.20482425689697265
|
|
|
|
key: test_mcc
|
|
value: [0.67462198 0.48917749 0.81778934 0.72077922 0.76789769 0.72077922
|
|
0.67532468 0.50454827 0.48807056 0.51986413]
|
|
|
|
mean value: 0.6378852573856032
|
|
|
|
key: train_mcc
|
|
value: [0.89158365 0.8914826 0.88630415 0.8914826 0.87081007 0.88635453
|
|
0.88635453 0.87596816 0.88143837 0.89683728]
|
|
|
|
mean value: 0.8858615950471861
|
|
|
|
key: test_accuracy
|
|
value: [0.8372093 0.74418605 0.90697674 0.86046512 0.88372093 0.86046512
|
|
0.8372093 0.74418605 0.74418605 0.74418605]
|
|
|
|
mean value: 0.8162790697674418
|
|
|
|
key: train_accuracy
|
|
value: [0.94573643 0.94573643 0.94315245 0.94573643 0.93540052 0.94315245
|
|
0.94315245 0.9379845 0.94056848 0.94832041]
|
|
|
|
mean value: 0.9428940568475452
|
|
|
|
key: test_fscore
|
|
value: [0.82926829 0.74418605 0.90909091 0.85714286 0.87804878 0.86363636
|
|
0.8372093 0.71794872 0.75555556 0.7027027 ]
|
|
|
|
mean value: 0.8094789528085047
|
|
|
|
key: train_fscore
|
|
value: [0.94545455 0.94601542 0.94329897 0.94601542 0.93573265 0.94329897
|
|
0.94329897 0.93782383 0.94117647 0.94871795]
|
|
|
|
mean value: 0.9430833202318074
|
|
|
|
key: test_precision
|
|
value: [0.85 0.72727273 0.86956522 0.85714286 0.9 0.86363636
|
|
0.85714286 0.82352941 0.73913043 0.86666667]
|
|
|
|
mean value: 0.835408653580009
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
train_precision
|
|
value: [0.95287958 0.94358974 0.94329897 0.94358974 0.93333333 0.93846154
|
|
0.93846154 0.93782383 0.92929293 0.93908629]
|
|
|
|
mean value: 0.9399817505565959
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.76190476 0.95238095 0.85714286 0.85714286 0.86363636
|
|
0.81818182 0.63636364 0.77272727 0.59090909]
|
|
|
|
mean value: 0.791991341991342
|
|
|
|
key: train_recall
|
|
value: [0.93814433 0.94845361 0.94329897 0.94845361 0.93814433 0.94818653
|
|
0.94818653 0.93782383 0.95336788 0.95854922]
|
|
|
|
mean value: 0.9462608834998131
|
|
|
|
key: test_roc_auc
|
|
value: [0.83658009 0.74458874 0.90800866 0.86038961 0.88311688 0.86038961
|
|
0.83766234 0.74675325 0.74350649 0.7478355 ]
|
|
|
|
mean value: 0.8168831168831169
|
|
|
|
key: train_roc_auc
|
|
value: [0.9457561 0.94572939 0.94315208 0.94572939 0.93539341 0.94316543
|
|
0.94316543 0.93798408 0.94060146 0.94834678]
|
|
|
|
mean value: 0.9429023556433951
|
|
|
|
key: test_jcc
|
|
value: [0.70833333 0.59259259 0.83333333 0.75 0.7826087 0.76
|
|
0.72 0.56 0.60714286 0.54166667]
|
|
|
|
mean value: 0.6855677478720957
|
|
|
|
key: train_jcc
|
|
value: [0.89655172 0.89756098 0.89268293 0.89756098 0.87922705 0.89268293
|
|
0.89268293 0.88292683 0.88888889 0.90243902]
|
|
|
|
mean value: 0.892320425153277
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01284814 0.01265669 0.012532 0.01183605 0.01195717 0.01215744
|
|
0.01211309 0.01218128 0.01174545 0.01174068]
|
|
|
|
mean value: 0.012176799774169921
|
|
|
|
key: score_time
|
|
value: [0.01120496 0.01093507 0.01121926 0.01031542 0.01077127 0.01087213
|
|
0.01022887 0.01016021 0.01030135 0.01021433]
|
|
|
|
mean value: 0.010622286796569824
|
|
|
|
key: test_mcc
|
|
value: [0.35141081 0.25490741 0.53595916 0.4517935 0.31423621 0.48807056
|
|
0.4912706 0.2581351 0.44701207 0.3961039 ]
|
|
|
|
mean value: 0.3988899295731957
|
|
|
|
key: train_mcc
|
|
value: [0.50010976 0.51422314 0.45901322 0.48339175 0.48864075 0.49401307
|
|
0.46826734 0.50904089 0.47386097 0.47386097]
|
|
|
|
mean value: 0.48644218743566964
|
|
|
|
key: test_accuracy
|
|
value: [0.6744186 0.62790698 0.76744186 0.72093023 0.65116279 0.74418605
|
|
0.74418605 0.62790698 0.72093023 0.69767442]
|
|
|
|
mean value: 0.6976744186046512
|
|
|
|
key: train_accuracy
|
|
value: [0.74935401 0.75710594 0.72868217 0.74160207 0.74418605 0.74677003
|
|
0.73385013 0.75452196 0.73643411 0.73643411]
|
|
|
|
mean value: 0.7428940568475452
|
|
|
|
key: test_fscore
|
|
value: [0.68181818 0.6 0.75 0.73913043 0.68085106 0.75555556
|
|
0.76595745 0.61904762 0.75 0.69767442]
|
|
|
|
mean value: 0.7040034720446915
|
|
|
|
key: train_fscore
|
|
value: [0.75930521 0.75897436 0.74074074 0.74619289 0.74936709 0.75126904
|
|
0.73924051 0.75324675 0.74371859 0.74371859]
|
|
|
|
mean value: 0.7485773773680334
|
|
|
|
key: test_precision
|
|
value: [0.65217391 0.63157895 0.78947368 0.68 0.61538462 0.73913043
|
|
0.72 0.65 0.69230769 0.71428571]
|
|
|
|
mean value: 0.6884335001383056
|
|
|
|
key: train_precision
|
|
value: [0.73205742 0.75510204 0.71090047 0.735 0.73631841 0.73631841
|
|
0.72277228 0.75520833 0.72195122 0.72195122]
|
|
|
|
mean value: 0.7327579796523762
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.57142857 0.71428571 0.80952381 0.76190476 0.77272727
|
|
0.81818182 0.59090909 0.81818182 0.68181818]
|
|
|
|
mean value: 0.7253246753246754
|
|
|
|
key: train_recall
|
|
value: [0.78865979 0.7628866 0.77319588 0.75773196 0.7628866 0.76683938
|
|
0.75647668 0.75129534 0.76683938 0.76683938]
|
|
|
|
mean value: 0.7653650980182682
|
|
|
|
key: test_roc_auc
|
|
value: [0.67532468 0.62662338 0.76623377 0.72294372 0.65367965 0.74350649
|
|
0.74242424 0.62878788 0.71861472 0.69805195]
|
|
|
|
mean value: 0.6976190476190477
|
|
|
|
key: train_roc_auc
|
|
value: [0.74925218 0.75709097 0.72856685 0.74156028 0.7441376 0.74682175
|
|
0.73390845 0.75451365 0.73651247 0.73651247]
|
|
|
|
mean value: 0.7428876662571444
|
|
|
|
key: test_jcc
|
|
value: [0.51724138 0.42857143 0.6 0.5862069 0.51612903 0.60714286
|
|
0.62068966 0.44827586 0.6 0.53571429]
|
|
|
|
mean value: 0.5459971396790084
|
|
|
|
key: train_jcc
|
|
value: [0.612 0.61157025 0.58823529 0.5951417 0.59919028 0.60162602
|
|
0.58634538 0.60416667 0.592 0.592 ]
|
|
|
|
mean value: 0.5982275590310133
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [1.2328546 0.66477251 1.61751771 0.90823627 0.69649839 0.87800646
|
|
0.88173628 0.4864018 0.63253689 1.07032633]
|
|
|
|
mean value: 0.9068887233734131
|
|
|
|
key: score_time
|
|
value: [0.01455235 0.01226497 0.01529741 0.01239967 0.01296425 0.01283431
|
|
0.01390839 0.01246428 0.01260304 0.01239204]
|
|
|
|
mean value: 0.013168072700500489
|
|
|
|
key: test_mcc
|
|
value: [0.72077922 0.53796222 0.81778934 0.723327 0.72451364 0.81778934
|
|
0.76839827 0.67532468 0.72077922 0.73471273]
|
|
|
|
mean value: 0.7241375648717405
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86046512 0.76744186 0.90697674 0.86046512 0.86046512 0.90697674
|
|
0.88372093 0.8372093 0.86046512 0.86046512]
|
|
|
|
mean value: 0.8604651162790697
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.77272727 0.90909091 0.85 0.86363636 0.9047619
|
|
0.88372093 0.8372093 0.86363636 0.85 ]
|
|
|
|
mean value: 0.8591925903553811
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.73913043 0.86956522 0.89473684 0.82608696 0.95
|
|
0.9047619 0.85714286 0.86363636 0.94444444]
|
|
|
|
mean value: 0.8706647877929342
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.80952381 0.95238095 0.80952381 0.9047619 0.86363636
|
|
0.86363636 0.81818182 0.86363636 0.77272727]
|
|
|
|
mean value: 0.8515151515151516
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86038961 0.76839827 0.90800866 0.85930736 0.86147186 0.90800866
|
|
0.88419913 0.83766234 0.86038961 0.86255411]
|
|
|
|
mean value: 0.861038961038961
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.62962963 0.83333333 0.73913043 0.76 0.82608696
|
|
0.79166667 0.72 0.76 0.73913043]
|
|
|
|
mean value: 0.7548977455716586
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.11340785 0.06507301 0.08535337 0.06547856 0.06026602 0.16849113
|
|
0.12170339 0.08560491 0.13529682 0.08245301]
|
|
|
|
mean value: 0.09831280708312988
|
|
|
|
key: score_time
|
|
value: [0.03657317 0.02178717 0.01445198 0.0345099 0.01233554 0.01242328
|
|
0.02445245 0.01907539 0.01216125 0.02539206]
|
|
|
|
mean value: 0.021316218376159667
|
|
|
|
key: test_mcc
|
|
value: [0.53796222 0.35141081 0.39479486 0.53463203 0.58225108 0.58824786
|
|
0.58225108 0.54609991 0.30265778 0.31423621]
|
|
|
|
mean value: 0.4734543830252807
|
|
|
|
key: train_mcc
|
|
value: [0.78836141 0.85069894 0.79855231 0.78838964 0.81913359 0.79393717
|
|
0.79853618 0.7934388 0.80366443 0.80365395]
|
|
|
|
mean value: 0.8038366418937956
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.6744186 0.69767442 0.76744186 0.79069767 0.79069767
|
|
0.79069767 0.76744186 0.65116279 0.65116279]
|
|
|
|
mean value: 0.7348837209302326
|
|
|
|
key: train_accuracy
|
|
value: [0.89405685 0.9250646 0.89922481 0.89405685 0.90956072 0.89664083
|
|
0.89922481 0.89664083 0.90180879 0.90180879]
|
|
|
|
mean value: 0.9018087855297158
|
|
|
|
key: test_fscore
|
|
value: [0.77272727 0.68181818 0.68292683 0.76190476 0.79069767 0.7804878
|
|
0.79069767 0.75 0.68085106 0.61538462]
|
|
|
|
mean value: 0.7307495878648169
|
|
|
|
key: train_fscore
|
|
value: [0.8956743 0.92388451 0.8987013 0.89295039 0.90956072 0.89417989
|
|
0.89817232 0.89528796 0.90206186 0.90104167]
|
|
|
|
mean value: 0.9011514926942206
|
|
|
|
key: test_precision
|
|
value: [0.73913043 0.65217391 0.7 0.76190476 0.77272727 0.84210526
|
|
0.80952381 0.83333333 0.64 0.70588235]
|
|
|
|
mean value: 0.7456781141414336
|
|
|
|
key: train_precision
|
|
value: [0.88442211 0.94117647 0.90575916 0.9047619 0.9119171 0.91351351
|
|
0.90526316 0.9047619 0.8974359 0.90575916]
|
|
|
|
mean value: 0.9074770382561882
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.71428571 0.66666667 0.76190476 0.80952381 0.72727273
|
|
0.77272727 0.68181818 0.72727273 0.54545455]
|
|
|
|
mean value: 0.7216450216450216
|
|
|
|
key: train_recall
|
|
value: [0.90721649 0.90721649 0.89175258 0.8814433 0.90721649 0.87564767
|
|
0.89119171 0.88601036 0.90673575 0.89637306]
|
|
|
|
mean value: 0.895080391004754
|
|
|
|
key: test_roc_auc
|
|
value: [0.76839827 0.67532468 0.6969697 0.76731602 0.79112554 0.79220779
|
|
0.79112554 0.76948052 0.64935065 0.65367965]
|
|
|
|
mean value: 0.7354978354978355
|
|
|
|
key: train_roc_auc
|
|
value: [0.89402276 0.92511084 0.89924416 0.89408953 0.9095668 0.89658672
|
|
0.8992041 0.89661343 0.90182148 0.90179478]
|
|
|
|
mean value: 0.9018054591100902
|
|
|
|
key: test_jcc
|
|
value: [0.62962963 0.51724138 0.51851852 0.61538462 0.65384615 0.64
|
|
0.65384615 0.6 0.51612903 0.44444444]
|
|
|
|
mean value: 0.5789039927237924
|
|
|
|
key: train_jcc
|
|
value: [0.81105991 0.85853659 0.81603774 0.80660377 0.83412322 0.80861244
|
|
0.81516588 0.81042654 0.82159624 0.81990521]
|
|
|
|
mean value: 0.8202067540037329
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01193261 0.01175404 0.01164556 0.011657 0.0116663 0.011657
|
|
0.01157117 0.0117991 0.01158476 0.01160455]
|
|
|
|
mean value: 0.011687207221984863
|
|
|
|
key: score_time
|
|
value: [0.02242517 0.01041889 0.01026773 0.01022983 0.01037145 0.01021743
|
|
0.01020646 0.01034784 0.01019359 0.01028991]
|
|
|
|
mean value: 0.011496829986572265
|
|
|
|
key: test_mcc
|
|
value: [0.3030303 0.34848485 0.64040632 0.49456394 0.40939224 0.39479486
|
|
0.49916256 0.25541126 0.4912706 0.30265778]
|
|
|
|
mean value: 0.4139174695448401
|
|
|
|
key: train_mcc
|
|
value: [0.43912593 0.43417069 0.41788904 0.42787777 0.42376414 0.43014703
|
|
0.40825746 0.45098666 0.43399577 0.46079208]
|
|
|
|
mean value: 0.4327006564068274
|
|
|
|
key: test_accuracy
|
|
value: [0.65116279 0.6744186 0.81395349 0.74418605 0.69767442 0.69767442
|
|
0.74418605 0.62790698 0.74418605 0.65116279]
|
|
|
|
mean value: 0.7046511627906977
|
|
|
|
key: train_accuracy
|
|
value: [0.71834625 0.71576227 0.70801034 0.71317829 0.71059432 0.71317829
|
|
0.70284238 0.72351421 0.71576227 0.72868217]
|
|
|
|
mean value: 0.7149870801033592
|
|
|
|
key: test_fscore
|
|
value: [0.65116279 0.66666667 0.82608696 0.75555556 0.72340426 0.71111111
|
|
0.7755102 0.63636364 0.76595745 0.68085106]
|
|
|
|
mean value: 0.7192669686955463
|
|
|
|
key: train_fscore
|
|
value: [0.73349633 0.73170732 0.72235872 0.72592593 0.72682927 0.72992701
|
|
0.71744472 0.73965937 0.72906404 0.74327628]
|
|
|
|
mean value: 0.7299688981336869
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.66666667 0.76 0.70833333 0.65384615 0.69565217
|
|
0.7037037 0.63636364 0.72 0.64 ]
|
|
|
|
mean value: 0.6820929304190174
|
|
|
|
key: train_precision
|
|
value: [0.69767442 0.69444444 0.69014085 0.69668246 0.68981481 0.68807339
|
|
0.68224299 0.69724771 0.69483568 0.7037037 ]
|
|
|
|
mean value: 0.6934860463415824
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.9047619 0.80952381 0.80952381 0.72727273
|
|
0.86363636 0.63636364 0.81818182 0.72727273]
|
|
|
|
mean value: 0.762987012987013
|
|
|
|
key: train_recall
|
|
value: [0.77319588 0.77319588 0.75773196 0.75773196 0.76804124 0.77720207
|
|
0.75647668 0.78756477 0.76683938 0.78756477]
|
|
|
|
mean value: 0.7705544575610277
|
|
|
|
key: test_roc_auc
|
|
value: [0.65151515 0.67424242 0.81601732 0.745671 0.70021645 0.6969697
|
|
0.74134199 0.62770563 0.74242424 0.64935065]
|
|
|
|
mean value: 0.7045454545454546
|
|
|
|
key: train_roc_auc
|
|
value: [0.71820416 0.71561348 0.70788152 0.71306287 0.71044549 0.7133433
|
|
0.70298061 0.72367929 0.71589392 0.72883393]
|
|
|
|
mean value: 0.7149938571657497
|
|
|
|
key: test_jcc
|
|
value: [0.48275862 0.5 0.7037037 0.60714286 0.56666667 0.55172414
|
|
0.63333333 0.46666667 0.62068966 0.51612903]
|
|
|
|
mean value: 0.5648814673564395
|
|
|
|
key: train_jcc
|
|
value: [0.57915058 0.57692308 0.56538462 0.56976744 0.57088123 0.57471264
|
|
0.55938697 0.58687259 0.57364341 0.59143969]
|
|
|
|
mean value: 0.5748162242671867
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01609015 0.01382875 0.01554108 0.01616502 0.02625966 0.01413321
|
|
0.01446033 0.01906276 0.01966596 0.0247612 ]
|
|
|
|
mean value: 0.017996811866760255
|
|
|
|
key: score_time
|
|
value: [0.01026535 0.00975919 0.01020408 0.01017809 0.01034117 0.01031709
|
|
0.01144147 0.01193285 0.01191831 0.01199841]
|
|
|
|
mean value: 0.010835599899291993
|
|
|
|
key: test_mcc
|
|
value: [0.61187382 0.40939224 0.51258863 0.39479486 0.21578506 0.38684081
|
|
0.59541363 0.41223987 0.26856633 0.31423621]
|
|
|
|
mean value: 0.4121731451940424
|
|
|
|
key: train_mcc
|
|
value: [0.64394423 0.69530024 0.60234402 0.64916894 0.46289376 0.54412654
|
|
0.61858746 0.48824211 0.6822421 0.68799886]
|
|
|
|
mean value: 0.6074848250415069
|
|
|
|
key: test_accuracy
|
|
value: [0.79069767 0.69767442 0.74418605 0.69767442 0.53488372 0.6744186
|
|
0.79069767 0.6744186 0.62790698 0.65116279]
|
|
|
|
mean value: 0.6883720930232557
|
|
|
|
key: train_accuracy
|
|
value: [0.81136951 0.84754522 0.77260982 0.82428941 0.67700258 0.74418605
|
|
0.80620155 0.69509044 0.8372093 0.84237726]
|
|
|
|
mean value: 0.7857881136950905
|
|
|
|
key: test_fscore
|
|
value: [0.74285714 0.72340426 0.68571429 0.68292683 0.67741935 0.74074074
|
|
0.81632653 0.75 0.69230769 0.61538462]
|
|
|
|
mean value: 0.7127081447042873
|
|
|
|
key: train_fscore
|
|
value: [0.78466077 0.84987277 0.7124183 0.82105263 0.75633528 0.78980892
|
|
0.81840194 0.76494024 0.84819277 0.83378747]
|
|
|
|
mean value: 0.7979471085693836
|
|
|
|
key: test_precision
|
|
value: [0.92857143 0.65384615 0.85714286 0.7 0.51219512 0.625
|
|
0.74074074 0.61764706 0.6 0.70588235]
|
|
|
|
mean value: 0.6941025714017106
|
|
|
|
key: train_precision
|
|
value: [0.91724138 0.83919598 0.97321429 0.83870968 0.60815047 0.66906475
|
|
0.76818182 0.62135922 0.79279279 0.87931034]
|
|
|
|
mean value: 0.7907220719867525
|
|
|
|
key: test_recall
|
|
value: [0.61904762 0.80952381 0.57142857 0.66666667 1. 0.90909091
|
|
0.90909091 0.95454545 0.81818182 0.54545455]
|
|
|
|
mean value: 0.7803030303030303
|
|
|
|
key: train_recall
|
|
value: [0.68556701 0.86082474 0.56185567 0.80412371 1. 0.96373057
|
|
0.87564767 0.99481865 0.9119171 0.79274611]
|
|
|
|
mean value: 0.8451231237647562
|
|
|
|
key: test_roc_auc
|
|
value: [0.78679654 0.70021645 0.74025974 0.6969697 0.54545455 0.66883117
|
|
0.78787879 0.66774892 0.62337662 0.65367965]
|
|
|
|
mean value: 0.6871212121212121
|
|
|
|
key: train_roc_auc
|
|
value: [0.81169542 0.84751082 0.77315581 0.82434165 0.6761658 0.74475188
|
|
0.80638054 0.69586293 0.83740185 0.84224935]
|
|
|
|
mean value: 0.7859516051492976
|
|
|
|
key: test_jcc
|
|
value: [0.59090909 0.56666667 0.52173913 0.51851852 0.51219512 0.58823529
|
|
0.68965517 0.6 0.52941176 0.44444444]
|
|
|
|
mean value: 0.5561775204162045
|
|
|
|
key: train_jcc
|
|
value: [0.64563107 0.73893805 0.55329949 0.69642857 0.60815047 0.65263158
|
|
0.69262295 0.61935484 0.73640167 0.71495327]
|
|
|
|
mean value: 0.6658411968237227
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03140759 0.0151937 0.01848125 0.01726508 0.01619983 0.02581859
|
|
0.0273428 0.02270555 0.01659775 0.02209139]
|
|
|
|
mean value: 0.02131035327911377
|
|
|
|
key: score_time
|
|
value: [0.01060104 0.01054025 0.01031375 0.01028848 0.01018238 0.01028967
|
|
0.01034975 0.01032352 0.01027846 0.01031113]
|
|
|
|
mean value: 0.010347843170166016
|
|
|
|
key: test_mcc
|
|
value: [0.59970431 0.4633482 0.75210143 0.57282196 0.4517935 0.61187382
|
|
0.67462198 0.36986766 0.20835137 0.40041988]
|
|
|
|
mean value: 0.5104904109207444
|
|
|
|
key: train_mcc
|
|
value: [0.65519777 0.73263194 0.72908637 0.5719457 0.69442892 0.63950439
|
|
0.69510176 0.57728429 0.74307158 0.73224955]
|
|
|
|
mean value: 0.6770502267008328
|
|
|
|
key: test_accuracy
|
|
value: [0.79069767 0.72093023 0.86046512 0.74418605 0.72093023 0.79069767
|
|
0.8372093 0.6744186 0.60465116 0.6744186 ]
|
|
|
|
mean value: 0.741860465116279
|
|
|
|
key: train_accuracy
|
|
value: [0.80620155 0.86563307 0.85788114 0.74935401 0.83979328 0.80103359
|
|
0.84754522 0.75452196 0.87080103 0.85788114]
|
|
|
|
mean value: 0.8250645994832041
|
|
|
|
key: test_fscore
|
|
value: [0.80851064 0.75 0.875 0.79245283 0.73913043 0.82352941
|
|
0.84444444 0.73076923 0.63829787 0.58823529]
|
|
|
|
mean value: 0.7590370156705615
|
|
|
|
key: train_fscore
|
|
value: [0.83588621 0.87 0.87058824 0.79917184 0.85514019 0.82926829
|
|
0.84754522 0.80083857 0.87437186 0.84057971]
|
|
|
|
mean value: 0.8423390135488182
|
|
|
|
key: test_precision
|
|
value: [0.73076923 0.66666667 0.77777778 0.65625 0.68 0.72413793
|
|
0.82608696 0.63333333 0.6 0.83333333]
|
|
|
|
mean value: 0.7128355229436564
|
|
|
|
key: train_precision
|
|
value: [0.72623574 0.84466019 0.8008658 0.66782007 0.78205128 0.7248062
|
|
0.84536082 0.67253521 0.84878049 0.95394737]
|
|
|
|
mean value: 0.7867063181527051
|
|
|
|
key: test_recall
|
|
value: [0.9047619 0.85714286 1. 1. 0.80952381 0.95454545
|
|
0.86363636 0.86363636 0.68181818 0.45454545]
|
|
|
|
mean value: 0.8389610389610389
|
|
|
|
key: train_recall
|
|
value: [0.98453608 0.89690722 0.95360825 0.99484536 0.94329897 0.96891192
|
|
0.84974093 0.98963731 0.9015544 0.75129534]
|
|
|
|
mean value: 0.9234335772661717
|
|
|
|
key: test_roc_auc
|
|
value: [0.79329004 0.72402597 0.86363636 0.75 0.72294372 0.78679654
|
|
0.83658009 0.66991342 0.60281385 0.67965368]
|
|
|
|
mean value: 0.7429653679653679
|
|
|
|
key: train_roc_auc
|
|
value: [0.80573954 0.86555205 0.85763314 0.74871802 0.83952513 0.80146627
|
|
0.84755088 0.75512793 0.87088029 0.85760643]
|
|
|
|
mean value: 0.8249799690187489
|
|
|
|
key: test_jcc
|
|
value: [0.67857143 0.6 0.77777778 0.65625 0.5862069 0.7
|
|
0.73076923 0.57575758 0.46875 0.41666667]
|
|
|
|
mean value: 0.6190749576094403
|
|
|
|
key: train_jcc
|
|
value: [0.71804511 0.7699115 0.77083333 0.66551724 0.74693878 0.70833333
|
|
0.73542601 0.66783217 0.77678571 0.725 ]
|
|
|
|
mean value: 0.7284623191849406
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.18568873 0.18229556 0.18312979 0.18038273 0.17975068 0.47091389
|
|
0.39590287 0.30203557 0.16115618 0.18954515]
|
|
|
|
mean value: 0.24308011531829835
|
|
|
|
key: score_time
|
|
value: [0.0176034 0.01771164 0.0178442 0.01760936 0.01758552 0.04107261
|
|
0.0410192 0.01772761 0.01576948 0.02026224]
|
|
|
|
mean value: 0.022420525550842285
|
|
|
|
key: test_mcc
|
|
value: [0.62964308 0.64040632 0.67462198 0.44468651 0.53595916 0.72451364
|
|
0.67532468 0.58225108 0.62770563 0.54609991]
|
|
|
|
mean value: 0.6081211972694571
|
|
|
|
key: train_mcc
|
|
value: [0.9225879 0.93803584 0.94877223 0.94836935 0.91735891 0.92785021
|
|
0.92249346 0.93282944 0.93313211 0.94316543]
|
|
|
|
mean value: 0.933459487609339
|
|
|
|
key: test_accuracy
|
|
value: [0.81395349 0.81395349 0.8372093 0.72093023 0.76744186 0.86046512
|
|
0.8372093 0.79069767 0.81395349 0.76744186]
|
|
|
|
mean value: 0.8023255813953488
|
|
|
|
key: train_accuracy
|
|
value: [0.96124031 0.96899225 0.97416021 0.97416021 0.95865633 0.96382429
|
|
0.96124031 0.96640827 0.96640827 0.97157623]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.82608696 0.82926829 0.72727273 0.75 0.85714286
|
|
0.8372093 0.79069767 0.81818182 0.75 ]
|
|
|
|
mean value: 0.7985859628546255
|
|
|
|
key: train_fscore
|
|
value: [0.96163683 0.96891192 0.97461929 0.97435897 0.95897436 0.96410256
|
|
0.96124031 0.96640827 0.96675192 0.97157623]
|
|
|
|
mean value: 0.9668580656879063
|
|
|
|
key: test_precision
|
|
value: [0.84210526 0.76 0.85 0.69565217 0.78947368 0.9
|
|
0.85714286 0.80952381 0.81818182 0.83333333]
|
|
|
|
mean value: 0.8155412939463282
|
|
|
|
key: train_precision
|
|
value: [0.95431472 0.97395833 0.96 0.96938776 0.95408163 0.95431472
|
|
0.95876289 0.96391753 0.95454545 0.96907216]
|
|
|
|
mean value: 0.9612355194577843
|
|
|
|
key: test_recall
|
|
value: [0.76190476 0.9047619 0.80952381 0.76190476 0.71428571 0.81818182
|
|
0.81818182 0.77272727 0.81818182 0.68181818]
|
|
|
|
mean value: 0.7861471861471861
|
|
|
|
key: train_recall
|
|
value: [0.96907216 0.96391753 0.98969072 0.97938144 0.96391753 0.97409326
|
|
0.96373057 0.96891192 0.97927461 0.97409326]
|
|
|
|
mean value: 0.9726083008386304
|
|
|
|
key: test_roc_auc
|
|
value: [0.81277056 0.81601732 0.83658009 0.72186147 0.76623377 0.86147186
|
|
0.83766234 0.79112554 0.81385281 0.76948052]
|
|
|
|
mean value: 0.8027056277056277
|
|
|
|
key: train_roc_auc
|
|
value: [0.96122002 0.9690054 0.97411997 0.97414668 0.9586427 0.96385076
|
|
0.96124673 0.96641472 0.96644143 0.97158271]
|
|
|
|
mean value: 0.9666671117995833
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.7037037 0.70833333 0.57142857 0.6 0.75
|
|
0.72 0.65384615 0.69230769 0.6 ]
|
|
|
|
mean value: 0.6666286121286121
|
|
|
|
key: train_jcc
|
|
value: [0.92610837 0.93969849 0.95049505 0.95 0.92118227 0.93069307
|
|
0.92537313 0.935 0.93564356 0.94472362]
|
|
|
|
mean value: 0.9358917568443528
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14063168 0.10446286 0.10352421 0.12305665 0.09840965 0.05504537
|
|
0.05416179 0.06609821 0.05265355 0.06720805]
|
|
|
|
mean value: 0.08652520179748535
|
|
|
|
key: score_time
|
|
value: [0.02307844 0.02167487 0.02694726 0.02493787 0.02693772 0.02003384
|
|
0.01963663 0.02375698 0.01780176 0.02739167]
|
|
|
|
mean value: 0.023219704627990723
|
|
|
|
key: test_mcc
|
|
value: [0.63732414 0.53463203 0.723327 0.62964308 0.62770563 0.82901914
|
|
0.58824786 0.58824786 0.65585036 0.61748053]
|
|
|
|
mean value: 0.6431477616870775
|
|
|
|
key: train_mcc
|
|
value: [0.96904298 0.96904463 0.98450937 0.94316543 0.94832007 0.96393847
|
|
0.96899204 0.98450896 0.9741727 0.97427611]
|
|
|
|
mean value: 0.9679970760915598
|
|
|
|
key: test_accuracy
|
|
value: [0.81395349 0.76744186 0.86046512 0.81395349 0.81395349 0.90697674
|
|
0.79069767 0.79069767 0.81395349 0.79069767]
|
|
|
|
mean value: 0.8162790697674418
|
|
|
|
key: train_accuracy
|
|
value: [0.98449612 0.98449612 0.99224806 0.97157623 0.97416021 0.98191214
|
|
0.98449612 0.99224806 0.9870801 0.9870801 ]
|
|
|
|
mean value: 0.9839793281653747
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.78947368 0.76190476 0.85 0.8 0.80952381 0.9
|
|
0.7804878 0.7804878 0.78947368 0.75675676]
|
|
|
|
mean value: 0.8018108306362478
|
|
|
|
key: train_fscore
|
|
value: [0.98461538 0.98445596 0.99224806 0.97157623 0.9742268 0.98172324
|
|
0.98445596 0.99220779 0.98701299 0.98694517]
|
|
|
|
mean value: 0.983946758177471
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.76190476 0.89473684 0.84210526 0.80952381 1.
|
|
0.84210526 0.84210526 0.9375 0.93333333]
|
|
|
|
mean value: 0.8745667477517323
|
|
|
|
key: train_precision
|
|
value: [0.97959184 0.98958333 0.99481865 0.97409326 0.9742268 0.98947368
|
|
0.98445596 0.99479167 0.98958333 0.99473684]
|
|
|
|
mean value: 0.9865355376155196
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.76190476 0.80952381 0.76190476 0.80952381 0.81818182
|
|
0.72727273 0.72727273 0.68181818 0.63636364]
|
|
|
|
mean value: 0.7448051948051948
|
|
|
|
key: train_recall
|
|
value: [0.98969072 0.97938144 0.98969072 0.96907216 0.9742268 0.97409326
|
|
0.98445596 0.98963731 0.98445596 0.97927461]
|
|
|
|
mean value: 0.9813978954115699
|
|
|
|
key: test_roc_auc
|
|
value: [0.81168831 0.76731602 0.85930736 0.81277056 0.81385281 0.90909091
|
|
0.79220779 0.79220779 0.81709957 0.79437229]
|
|
|
|
mean value: 0.8169913419913419
|
|
|
|
key: train_roc_auc
|
|
value: [0.98448267 0.98450937 0.99225469 0.97158271 0.97416003 0.98189199
|
|
0.98449602 0.99224133 0.98707334 0.98705999]
|
|
|
|
mean value: 0.9839752149991988
|
|
|
|
key: test_jcc
|
|
value: [0.65217391 0.61538462 0.73913043 0.66666667 0.68 0.81818182
|
|
0.64 0.64 0.65217391 0.60869565]
|
|
|
|
mean value: 0.6712407013276579
|
|
|
|
key: train_jcc
|
|
value: [0.96969697 0.96938776 0.98461538 0.94472362 0.94974874 0.96410256
|
|
0.96938776 0.98453608 0.97435897 0.9742268 ]
|
|
|
|
mean value: 0.9684784651384958
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19252563 0.15471125 0.11138511 0.15535378 0.13108945 0.1718359
|
|
0.15187907 0.18106008 0.18106508 0.21381736]
|
|
|
|
mean value: 0.16447227001190184
|
|
|
|
key: score_time
|
|
value: [0.02305579 0.02334285 0.01446009 0.02341747 0.01428199 0.02465487
|
|
0.02461863 0.02343106 0.02325463 0.04997849]
|
|
|
|
mean value: 0.024449586868286133
|
|
|
|
key: test_mcc
|
|
value: [0.44227524 0.34848485 0.62770563 0.54609991 0.34848485 0.3030303
|
|
0.58134627 0.35748709 0.2567 0.55959928]
|
|
|
|
mean value: 0.4371213413695219
|
|
|
|
key: train_mcc
|
|
value: [0.97427816 0.97427816 0.97938089 0.97427816 0.97417339 0.9741727
|
|
0.97427611 0.96414361 0.97937979 0.97937979]
|
|
|
|
mean value: 0.9747740775759611
|
|
|
|
key: test_accuracy
|
|
value: [0.72093023 0.6744186 0.81395349 0.76744186 0.6744186 0.65116279
|
|
0.79069767 0.6744186 0.62790698 0.76744186]
|
|
|
|
mean value: 0.7162790697674418
|
|
|
|
key: train_accuracy
|
|
value: [0.9870801 0.9870801 0.98966408 0.9870801 0.9870801 0.9870801
|
|
0.9870801 0.98191214 0.98966408 0.98966408]
|
|
|
|
mean value: 0.9873385012919897
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.66666667 0.80952381 0.7826087 0.66666667 0.65116279
|
|
0.8 0.65 0.66666667 0.73684211]
|
|
|
|
mean value: 0.7130137401136816
|
|
|
|
key: train_fscore
|
|
value: [0.98701299 0.98701299 0.98963731 0.98701299 0.9870801 0.98701299
|
|
0.98694517 0.9816273 0.98958333 0.98958333]
|
|
|
|
mean value: 0.987250849007799
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.66666667 0.80952381 0.72 0.66666667 0.66666667
|
|
0.7826087 0.72222222 0.61538462 0.875 ]
|
|
|
|
mean value: 0.7261581448045978
|
|
|
|
key: train_precision
|
|
value: [0.9947644 0.9947644 0.99479167 0.9947644 0.98963731 0.98958333
|
|
0.99473684 0.99468085 0.9947644 0.9947644 ]
|
|
|
|
mean value: 0.9937251988397371
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.80952381 0.85714286 0.66666667 0.63636364
|
|
0.81818182 0.59090909 0.72727273 0.63636364]
|
|
|
|
mean value: 0.7075757575757575
|
|
|
|
key: train_recall
|
|
value: [0.97938144 0.97938144 0.98453608 0.97938144 0.98453608 0.98445596
|
|
0.97927461 0.96891192 0.98445596 0.98445596]
|
|
|
|
mean value: 0.9808770898990439
|
|
|
|
key: test_roc_auc
|
|
value: [0.71969697 0.67424242 0.81385281 0.76948052 0.67424242 0.65151515
|
|
0.79004329 0.67640693 0.62554113 0.77056277]
|
|
|
|
mean value: 0.7165584415584415
|
|
|
|
key: train_roc_auc
|
|
value: [0.98710005 0.98710005 0.98967737 0.98710005 0.98708669 0.98707334
|
|
0.98705999 0.98187864 0.98965066 0.98965066]
|
|
|
|
mean value: 0.9873377490518669
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.5 0.68 0.64285714 0.5 0.48275862
|
|
0.66666667 0.48148148 0.5 0.58333333]
|
|
|
|
mean value: 0.5575558783489818
|
|
|
|
key: train_jcc
|
|
value: [0.97435897 0.97435897 0.97948718 0.97435897 0.9744898 0.97435897
|
|
0.9742268 0.96391753 0.97938144 0.97938144]
|
|
|
|
mean value: 0.974832008933629
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.62265253 0.64571428 0.66495323 0.91781998 1.16082621 1.02426267
|
|
0.69185972 0.61647153 0.62877178 0.62057972]
|
|
|
|
mean value: 0.7593911647796631
|
|
|
|
key: score_time
|
|
value: [0.00941133 0.00966191 0.01070642 0.01257944 0.0127492 0.03311658
|
|
0.00940585 0.01045775 0.01020002 0.00968361]
|
|
|
|
mean value: 0.012797212600708008
|
|
|
|
key: test_mcc
|
|
value: [0.53463203 0.53796222 0.86147186 0.67988342 0.7756157 0.81778934
|
|
0.7756157 0.723327 0.73471273 0.69486034]
|
|
|
|
mean value: 0.7135870332283394
|
|
|
|
key: train_mcc
|
|
value: [0.99484522 1. 1. 1. 1. 0.99484536
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9989690584330672
|
|
|
|
key: test_accuracy
|
|
value: [0.76744186 0.76744186 0.93023256 0.8372093 0.88372093 0.90697674
|
|
0.88372093 0.86046512 0.86046512 0.8372093 ]
|
|
|
|
mean value: 0.8534883720930233
|
|
|
|
key: train_accuracy
|
|
value: [0.99741602 1. 1. 1. 1. 0.99741602
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999483204134367
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.77272727 0.93023256 0.82051282 0.88888889 0.9047619
|
|
0.87804878 0.86956522 0.85 0.82051282]
|
|
|
|
mean value: 0.8497155025327113
|
|
|
|
key: train_fscore
|
|
value: [0.99742931 1. 1. 1. 1. 0.99741602
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994845326584431
|
|
|
|
key: test_precision
|
|
value: [0.76190476 0.73913043 0.90909091 0.88888889 0.83333333 0.95
|
|
0.94736842 0.83333333 0.94444444 0.94117647]
|
|
|
|
mean value: 0.8748670997419147
|
|
|
|
key: train_precision
|
|
value: [0.99487179 1. 1. 1. 1. 0.99484536
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9989717155696537
|
|
|
|
key: test_recall
|
|
value: [0.76190476 0.80952381 0.95238095 0.76190476 0.95238095 0.86363636
|
|
0.81818182 0.90909091 0.77272727 0.72727273]
|
|
|
|
mean value: 0.8329004329004329
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76731602 0.76839827 0.93073593 0.83549784 0.88528139 0.90800866
|
|
0.88528139 0.85930736 0.86255411 0.83982684]
|
|
|
|
mean value: 0.8542207792207792
|
|
|
|
key: train_roc_auc
|
|
value: [0.99740933 1. 1. 1. 1. 0.99742268
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994832006837242
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.62962963 0.86956522 0.69565217 0.8 0.82608696
|
|
0.7826087 0.76923077 0.73913043 0.69565217]
|
|
|
|
mean value: 0.7422940666418927
|
|
|
|
key: train_jcc
|
|
value: [0.99487179 1. 1. 1. 1. 0.99484536
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9989717155696537
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03295922 0.05415583 0.05406284 0.05276442 0.06451917 0.03202033
|
|
0.03230309 0.0328548 0.03241825 0.03225565]
|
|
|
|
mean value: 0.042031359672546384
|
|
|
|
key: score_time
|
|
value: [0.01492977 0.02648234 0.02877092 0.03977489 0.01995325 0.01646137
|
|
0.02642083 0.01375341 0.01634121 0.01620936]
|
|
|
|
mean value: 0.0219097375869751
|
|
|
|
key: test_mcc
|
|
value: [0.28169285 0.44701207 0.2270149 0.53463203 0.30265778 0.38684081
|
|
0.26106714 0.53595916 0.17358241 0.20824344]
|
|
|
|
mean value: 0.33587025909950874
|
|
|
|
key: train_mcc
|
|
value: [0.72548957 0.76710504 0.85389622 0.93292554 0.92769572 0.69709662
|
|
0.66867145 0.72588852 0.92048062 0.8179781 ]
|
|
|
|
mean value: 0.8037227392706445
|
|
|
|
key: test_accuracy
|
|
value: [0.62790698 0.72093023 0.60465116 0.76744186 0.65116279 0.6744186
|
|
0.62790698 0.76744186 0.58139535 0.60465116]
|
|
|
|
mean value: 0.6627906976744186
|
|
|
|
key: train_accuracy
|
|
value: [0.84496124 0.87855297 0.92248062 0.96640827 0.96382429 0.82687339
|
|
0.80878553 0.84496124 0.95865633 0.90180879]
|
|
|
|
mean value: 0.8917312661498707
|
|
|
|
key: test_fscore
|
|
value: [0.68 0.68421053 0.65306122 0.76190476 0.61538462 0.74074074
|
|
0.68 0.7826087 0.66666667 0.62222222]
|
|
|
|
mean value: 0.6886799453376766
|
|
|
|
key: train_fscore
|
|
value: [0.86607143 0.86834734 0.92788462 0.96675192 0.96410256 0.85209713
|
|
0.83913043 0.86547085 0.960199 0.90995261]
|
|
|
|
mean value: 0.9020007893806317
|
|
|
|
key: test_precision
|
|
value: [0.5862069 0.76470588 0.57142857 0.76190476 0.66666667 0.625
|
|
0.60714286 0.75 0.5625 0.60869565]
|
|
|
|
mean value: 0.6504251288221435
|
|
|
|
key: train_precision
|
|
value: [0.76377953 0.95092025 0.86936937 0.95939086 0.95918367 0.74230769
|
|
0.72284644 0.76284585 0.92344498 0.83842795]
|
|
|
|
mean value: 0.8492516586473186
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.61904762 0.76190476 0.76190476 0.57142857 0.90909091
|
|
0.77272727 0.81818182 0.81818182 0.63636364]
|
|
|
|
mean value: 0.7478354978354979
|
|
|
|
key: train_recall
|
|
value: [1. 0.79896907 0.99484536 0.9742268 0.96907216 1.
|
|
1. 1. 1. 0.99481865]
|
|
|
|
mean value: 0.9731932054911596
|
|
|
|
key: test_roc_auc
|
|
value: [0.63203463 0.71861472 0.60822511 0.76731602 0.64935065 0.66883117
|
|
0.62445887 0.76623377 0.57575758 0.6038961 ]
|
|
|
|
mean value: 0.6614718614718614
|
|
|
|
key: train_roc_auc
|
|
value: [0.84455959 0.87875915 0.92229315 0.96638801 0.96381069 0.82731959
|
|
0.80927835 0.84536082 0.95876289 0.9020485 ]
|
|
|
|
mean value: 0.8918580738208429
|
|
|
|
key: test_jcc
|
|
value: [0.51515152 0.52 0.48484848 0.61538462 0.44444444 0.58823529
|
|
0.51515152 0.64285714 0.5 0.4516129 ]
|
|
|
|
mean value: 0.5277685915181172
|
|
|
|
key: train_jcc
|
|
value: [0.76377953 0.76732673 0.86547085 0.93564356 0.93069307 0.74230769
|
|
0.72284644 0.76284585 0.92344498 0.83478261]
|
|
|
|
mean value: 0.8249141314743462
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02470899 0.03221989 0.03314185 0.0360744 0.03333807 0.03217483
|
|
0.03215766 0.03216076 0.03210974 0.032897 ]
|
|
|
|
mean value: 0.03209831714630127
|
|
|
|
key: score_time
|
|
value: [0.02296257 0.02556992 0.02539229 0.02259994 0.02406454 0.02528095
|
|
0.02319956 0.02534366 0.02206135 0.02454829]
|
|
|
|
mean value: 0.024102306365966795
|
|
|
|
key: test_mcc
|
|
value: [0.58225108 0.4633482 0.4912706 0.3961039 0.4633482 0.64040632
|
|
0.63123793 0.67532468 0.35185603 0.32463131]
|
|
|
|
mean value: 0.5019778248370252
|
|
|
|
key: train_mcc
|
|
value: [0.76230669 0.76231938 0.76227231 0.74163306 0.73220717 0.74686824
|
|
0.73143499 0.77778965 0.77786089 0.74226246]
|
|
|
|
mean value: 0.7536954854633586
|
|
|
|
key: test_accuracy
|
|
value: [0.79069767 0.72093023 0.74418605 0.69767442 0.72093023 0.81395349
|
|
0.81395349 0.8372093 0.6744186 0.65116279]
|
|
|
|
mean value: 0.7465116279069768
|
|
|
|
key: train_accuracy
|
|
value: [0.88113695 0.88113695 0.88113695 0.87080103 0.86563307 0.87338501
|
|
0.86563307 0.88888889 0.88888889 0.87080103]
|
|
|
|
mean value: 0.8767441860465116
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.75 0.71794872 0.69767442 0.75 0.8
|
|
0.80952381 0.8372093 0.70833333 0.59459459]
|
|
|
|
mean value: 0.7455981850749293
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_sl.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.88205128 0.88082902 0.8814433 0.87179487 0.86934673 0.87403599
|
|
0.86666667 0.88888889 0.88772846 0.87309645]
|
|
|
|
mean value: 0.8775881653530921
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.66666667 0.77777778 0.68181818 0.66666667 0.88888889
|
|
0.85 0.85714286 0.65384615 0.73333333]
|
|
|
|
mean value: 0.7548867798867799
|
|
|
|
key: train_precision
|
|
value: [0.87755102 0.88541667 0.8814433 0.86734694 0.84803922 0.86734694
|
|
0.85786802 0.88659794 0.89473684 0.85572139]
|
|
|
|
mean value: 0.8722068272870185
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.85714286 0.66666667 0.71428571 0.85714286 0.72727273
|
|
0.77272727 0.81818182 0.77272727 0.5 ]
|
|
|
|
mean value: 0.7495670995670995
|
|
|
|
key: train_recall
|
|
value: [0.88659794 0.87628866 0.8814433 0.87628866 0.89175258 0.88082902
|
|
0.87564767 0.89119171 0.88082902 0.89119171]
|
|
|
|
mean value: 0.8832060253191603
|
|
|
|
key: test_roc_auc
|
|
value: [0.79112554 0.72402597 0.74242424 0.69805195 0.72402597 0.81601732
|
|
0.81493506 0.83766234 0.67207792 0.6547619 ]
|
|
|
|
mean value: 0.7475108225108226
|
|
|
|
key: train_roc_auc
|
|
value: [0.8811228 0.88114951 0.88113616 0.87078682 0.86556541 0.8734042
|
|
0.86565889 0.88889482 0.88886812 0.87085359]
|
|
|
|
mean value: 0.8767440307675872
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.6 0.56 0.53571429 0.6 0.66666667
|
|
0.68 0.72 0.5483871 0.42307692]
|
|
|
|
mean value: 0.5987691126078223
|
|
|
|
key: train_jcc
|
|
value: [0.78899083 0.78703704 0.78801843 0.77272727 0.76888889 0.77625571
|
|
0.76470588 0.8 0.79812207 0.77477477]
|
|
|
|
mean value: 0.7819520888138968
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.32377195 0.44215226 0.36714745 0.3412354 0.3573339 0.34631348
|
|
0.37735128 0.42175651 0.370193 0.30671382]
|
|
|
|
mean value: 0.3653969049453735
|
|
|
|
key: score_time
|
|
value: [0.02374911 0.02069783 0.0234592 0.02423143 0.02390671 0.02344298
|
|
0.0230155 0.0243516 0.02337193 0.02246904]
|
|
|
|
mean value: 0.02326953411102295
|
|
|
|
key: test_mcc
|
|
value: [0.62770563 0.49456394 0.67462198 0.44468651 0.62770563 0.67532468
|
|
0.77418983 0.72451364 0.30265778 0.3030303 ]
|
|
|
|
mean value: 0.5648999907145589
|
|
|
|
key: train_mcc
|
|
value: [0.7002564 0.70030181 0.65903507 0.69005132 0.66414682 0.67974678
|
|
0.65398592 0.69510176 0.69557211 0.7059139 ]
|
|
|
|
mean value: 0.6844111884571173
|
|
|
|
key: test_accuracy
|
|
value: [0.81395349 0.74418605 0.8372093 0.72093023 0.81395349 0.8372093
|
|
0.88372093 0.86046512 0.65116279 0.65116279]
|
|
|
|
mean value: 0.7813953488372093
|
|
|
|
key: train_accuracy
|
|
value: [0.8501292 0.8501292 0.82945736 0.84496124 0.83204134 0.83979328
|
|
0.82687339 0.84754522 0.84754522 0.85271318]
|
|
|
|
mean value: 0.8421188630490956
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.75555556 0.82926829 0.72727273 0.80952381 0.8372093
|
|
0.89361702 0.85714286 0.68085106 0.65116279]
|
|
|
|
mean value: 0.7851127229831325
|
|
|
|
key: train_fscore
|
|
value: [0.85051546 0.84974093 0.83163265 0.84693878 0.83375959 0.84102564
|
|
0.8286445 0.84754522 0.84987277 0.85496183]
|
|
|
|
mean value: 0.8434637383464901
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.70833333 0.85 0.69565217 0.80952381 0.85714286
|
|
0.84 0.9 0.64 0.66666667]
|
|
|
|
mean value: 0.7776842650103519
|
|
|
|
key: train_precision
|
|
value: [0.85051546 0.85416667 0.82323232 0.83838384 0.82741117 0.83248731
|
|
0.81818182 0.84536082 0.835 0.84 ]
|
|
|
|
mean value: 0.8364739412281801
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.80952381 0.80952381 0.76190476 0.80952381 0.81818182
|
|
0.95454545 0.81818182 0.72727273 0.63636364]
|
|
|
|
mean value: 0.7954545454545454
|
|
|
|
key: train_recall
|
|
value: [0.85051546 0.84536082 0.84020619 0.8556701 0.84020619 0.84974093
|
|
0.83937824 0.84974093 0.86528497 0.87046632]
|
|
|
|
mean value: 0.8506570161850329
|
|
|
|
key: test_roc_auc
|
|
value: [0.81385281 0.745671 0.83658009 0.72186147 0.81385281 0.83766234
|
|
0.88203463 0.86147186 0.64935065 0.65151515]
|
|
|
|
mean value: 0.7813852813852814
|
|
|
|
key: train_roc_auc
|
|
value: [0.8501282 0.85014155 0.82942952 0.8449335 0.83202019 0.83981892
|
|
0.82690561 0.84755088 0.84759094 0.85275893]
|
|
|
|
mean value: 0.8421278243683564
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.60714286 0.70833333 0.57142857 0.68 0.72
|
|
0.80769231 0.75 0.51612903 0.48275862]
|
|
|
|
mean value: 0.6523484722544789
|
|
|
|
key: train_jcc
|
|
value: [0.73991031 0.73873874 0.71179039 0.73451327 0.71491228 0.72566372
|
|
0.70742358 0.73542601 0.73893805 0.74666667]
|
|
|
|
mean value: 0.7293983027024029
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|