19376 lines
946 KiB
Text
19376 lines
946 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_8020.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 424
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 424
|
|
ncols: 265
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 102
|
|
log10_or_mychisq 102
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 166
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 173
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification [COMPLETE data]: 80/20
|
|
Original data size: (424, 173)
|
|
Train data size: (339, 173)
|
|
Test data size: (85, 173)
|
|
y_train numbers: Counter({1: 186, 0: 153})
|
|
y_train ratio: 0.8225806451612904
|
|
|
|
y_test_numbers: Counter({1: 47, 0: 38})
|
|
y_test ratio: 0.8085106382978723
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({1: 186, 0: 153}) Data dim: (339, 173)
|
|
|
|
Simple Random OverSampling
|
|
Counter({0: 186, 1: 186})
|
|
(372, 173)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 153, 1: 153})
|
|
(306, 173)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 186, 1: 186})
|
|
(372, 173)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 186, 1: 186})
|
|
(372, 173)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis [COMPLETE DATA]: 80/20 split
|
|
Gene name: pncA
|
|
Drug name: pyrazinamide
|
|
|
|
Output directory: /home/tanu/git/Data/pyrazinamide/output/ml/tts_cd_8020/
|
|
|
|
Sanity checks:
|
|
Total input features: 173
|
|
|
|
Training data size: (339, 173)
|
|
Test data size: (85, 173)
|
|
|
|
Target feature numbers (training data): Counter({1: 186, 0: 153})
|
|
Target features ratio (training data: 0.8225806451612904
|
|
|
|
Target feature numbers (test data): Counter({1: 47, 0: 38})
|
|
Target features ratio (test data): 0.8085106382978723
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 34
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03504539 0.03420782 0.03705001 0.04274511 0.03523779 0.03432298
|
|
0.03052521 0.03545332 0.03375626 0.04529309]
|
|
|
|
mean value: 0.03636369705200195
|
|
|
|
key: score_time
|
|
value: [0.01234341 0.01193953 0.01209688 0.01214099 0.01208639 0.01200986
|
|
0.01195288 0.01204371 0.0153923 0.01720619]
|
|
|
|
mean value: 0.01292121410369873
|
|
|
|
key: test_mcc
|
|
value: [0.58925565 0.6846532 0.48168199 0.51983348 0.4677202 0.51983348
|
|
0.28421053 0.34904492 0.27185678 0.51111111]
|
|
|
|
mean value: 0.46792013458989595
|
|
|
|
key: train_mcc
|
|
value: [0.69496513 0.68791423 0.66795034 0.68173317 0.68838937 0.7219203
|
|
0.7017901 0.7017901 0.7017901 0.68267465]
|
|
|
|
mean value: 0.6930917476333396
|
|
|
|
key: test_accuracy
|
|
value: [0.79411765 0.82352941 0.73529412 0.76470588 0.73529412 0.76470588
|
|
0.64705882 0.67647059 0.64705882 0.75757576]
|
|
|
|
mean value: 0.7345811051693405
|
|
|
|
key: train_accuracy
|
|
value: [0.84918033 0.84590164 0.83606557 0.84262295 0.84590164 0.86229508
|
|
0.85245902 0.85245902 0.85245902 0.84313725]
|
|
|
|
mean value: 0.8482481517197042
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.85714286 0.72727273 0.8 0.75675676 0.8
|
|
0.68421053 0.7027027 0.71428571 0.77777778]
|
|
|
|
mean value: 0.7620149062254326
|
|
|
|
key: train_fscore
|
|
value: [0.86781609 0.86217009 0.85549133 0.85964912 0.86135693 0.87790698
|
|
0.86880466 0.86880466 0.86880466 0.86127168]
|
|
|
|
mean value: 0.8652076211584602
|
|
|
|
key: test_precision
|
|
value: [0.82352941 0.75 0.8 0.76190476 0.77777778 0.76190476
|
|
0.68421053 0.72222222 0.65217391 0.77777778]
|
|
|
|
mean value: 0.7511501152711275
|
|
|
|
key: train_precision
|
|
value: [0.83888889 0.84971098 0.83146067 0.84 0.84883721 0.85310734
|
|
0.84659091 0.84659091 0.84659091 0.83707865]
|
|
|
|
mean value: 0.8438856478598367
|
|
|
|
key: test_recall
|
|
value: [0.77777778 1. 0.66666667 0.84210526 0.73684211 0.84210526
|
|
0.68421053 0.68421053 0.78947368 0.77777778]
|
|
|
|
mean value: 0.7801169590643274
|
|
|
|
key: train_recall
|
|
value: [0.89880952 0.875 0.88095238 0.88023952 0.8742515 0.90419162
|
|
0.89221557 0.89221557 0.89221557 0.88690476]
|
|
|
|
mean value: 0.8876996007984033
|
|
|
|
key: test_roc_auc
|
|
value: [0.79513889 0.8125 0.73958333 0.75438596 0.73508772 0.75438596
|
|
0.64210526 0.6754386 0.62807018 0.75555556]
|
|
|
|
mean value: 0.7292251461988304
|
|
|
|
key: train_roc_auc
|
|
value: [0.84356535 0.84260949 0.83098714 0.83867049 0.84292285 0.85789291
|
|
0.8482817 0.8482817 0.8482817 0.83837992]
|
|
|
|
mean value: 0.8439873228788377
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.75 0.57142857 0.66666667 0.60869565 0.66666667
|
|
0.52 0.54166667 0.55555556 0.63636364]
|
|
|
|
mean value: 0.6183710082188343
|
|
|
|
key: train_jcc
|
|
value: [0.76649746 0.75773196 0.74747475 0.75384615 0.75647668 0.78238342
|
|
0.76804124 0.76804124 0.76804124 0.75634518]
|
|
|
|
mean value: 0.7624879314644846
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.44330192 1.09825206 0.76107049 0.75766945 1.12681198 1.15268183
|
|
0.95372033 0.79295349 0.7549293 0.91932583]
|
|
|
|
mean value: 0.9760716676712036
|
|
|
|
key: score_time
|
|
value: [0.01216173 0.01207638 0.0119741 0.0119977 0.01245975 0.01583004
|
|
0.01237631 0.01201558 0.01495123 0.0152967 ]
|
|
|
|
mean value: 0.013113951683044434
|
|
|
|
key: test_mcc
|
|
value: [0.47075654 0.60755744 0.52822141 0.52509323 0.58055371 0.58055371
|
|
0.23036965 0.46019501 0.21022326 0.3985267 ]
|
|
|
|
mean value: 0.4592050655256825
|
|
|
|
key: train_mcc
|
|
value: [0.52253387 0.61449375 0.50226813 0.56204732 0.54322557 0.76845863
|
|
0.64884125 0.64201105 0.7749747 0.74897428]
|
|
|
|
mean value: 0.632782856068499
|
|
|
|
key: test_accuracy
|
|
value: [0.73529412 0.79411765 0.76470588 0.76470588 0.79411765 0.79411765
|
|
0.61764706 0.73529412 0.61764706 0.6969697 ]
|
|
|
|
mean value: 0.7314616755793226
|
|
|
|
key: train_accuracy
|
|
value: [0.76393443 0.80983607 0.75409836 0.78360656 0.77377049 0.8852459
|
|
0.82622951 0.82295082 0.88852459 0.87581699]
|
|
|
|
mean value: 0.8184013714775528
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.82926829 0.78947368 0.80952381 0.82051282 0.82051282
|
|
0.64864865 0.76923077 0.72340426 0.70588235]
|
|
|
|
mean value: 0.7685688222813416
|
|
|
|
key: train_fscore
|
|
value: [0.80110497 0.83139535 0.79338843 0.81142857 0.80672269 0.89795918
|
|
0.84726225 0.84393064 0.9005848 0.88953488]
|
|
|
|
mean value: 0.8423311757861972
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.73913043 0.75 0.73913043 0.8 0.8
|
|
0.66666667 0.75 0.60714286 0.75 ]
|
|
|
|
mean value: 0.7316356107660456
|
|
|
|
key: train_precision
|
|
value: [0.74742268 0.8125 0.73846154 0.77595628 0.75789474 0.875
|
|
0.81666667 0.81564246 0.88 0.86931818]
|
|
|
|
mean value: 0.8088862546454427
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.94444444 0.83333333 0.89473684 0.84210526 0.84210526
|
|
0.63157895 0.78947368 0.89473684 0.66666667]
|
|
|
|
mean value: 0.8172514619883041
|
|
|
|
key: train_recall
|
|
value: [0.86309524 0.85119048 0.85714286 0.8502994 0.86227545 0.92215569
|
|
0.88023952 0.8742515 0.92215569 0.91071429]
|
|
|
|
mean value: 0.879352010265184
|
|
|
|
key: test_roc_auc
|
|
value: [0.72916667 0.78472222 0.76041667 0.74736842 0.7877193 0.7877193
|
|
0.61578947 0.72807018 0.58070175 0.7 ]
|
|
|
|
mean value: 0.7221673976608187
|
|
|
|
key: train_roc_auc
|
|
value: [0.7527155 0.80515728 0.74244004 0.77659898 0.76447106 0.8813677
|
|
0.82055454 0.81756053 0.88499089 0.87202381]
|
|
|
|
mean value: 0.811788033060234
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.70833333 0.65217391 0.68 0.69565217 0.69565217
|
|
0.48 0.625 0.56666667 0.54545455]
|
|
|
|
mean value: 0.6273932806324111
|
|
|
|
key: train_jcc
|
|
value: [0.66820276 0.71144279 0.65753425 0.68269231 0.67605634 0.81481481
|
|
0.735 0.73 0.81914894 0.80104712]
|
|
|
|
mean value: 0.7295939314746305
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01692581 0.01213837 0.01068115 0.00934362 0.00922394 0.00924301
|
|
0.00924134 0.0093658 0.00914884 0.00920057]
|
|
|
|
mean value: 0.010451245307922363
|
|
|
|
key: score_time
|
|
value: [0.01336455 0.01001644 0.00986552 0.00880075 0.00871325 0.00873637
|
|
0.00874019 0.00875568 0.00870728 0.00868845]
|
|
|
|
mean value: 0.009438848495483399
|
|
|
|
key: test_mcc
|
|
value: [ 0.28876104 0.10372932 0.47075654 0.46647866 0.39847501 0.46647866
|
|
-0.03645655 0.15415752 0.52509323 0.12909944]
|
|
|
|
mean value: 0.2966572863448208
|
|
|
|
key: train_mcc
|
|
value: [0.38310013 0.40906281 0.38224558 0.41012686 0.37946412 0.39096197
|
|
0.42884262 0.40945545 0.4138192 0.45261556]
|
|
|
|
mean value: 0.4059694298389224
|
|
|
|
key: test_accuracy
|
|
value: [0.64705882 0.55882353 0.73529412 0.73529412 0.70588235 0.73529412
|
|
0.5 0.58823529 0.76470588 0.57575758]
|
|
|
|
mean value: 0.6546345811051694
|
|
|
|
key: train_accuracy
|
|
value: [0.69508197 0.70819672 0.69508197 0.70819672 0.68196721 0.69836066
|
|
0.71803279 0.70819672 0.70819672 0.72875817]
|
|
|
|
mean value: 0.7050069645344477
|
|
|
|
key: test_fscore
|
|
value: [0.68421053 0.63414634 0.76923077 0.79069767 0.76190476 0.79069767
|
|
0.58536585 0.65 0.80952381 0.65 ]
|
|
|
|
mean value: 0.7125777410934291
|
|
|
|
key: train_fscore
|
|
value: [0.75590551 0.76266667 0.75461741 0.76010782 0.76167076 0.75531915
|
|
0.76373626 0.75880759 0.7651715 0.75223881]
|
|
|
|
mean value: 0.759024148178431
|
|
|
|
key: test_precision
|
|
value: [0.65 0.56521739 0.71428571 0.70833333 0.69565217 0.70833333
|
|
0.54545455 0.61904762 0.73913043 0.59090909]
|
|
|
|
mean value: 0.6536363636363637
|
|
|
|
key: train_precision
|
|
value: [0.67605634 0.69082126 0.67772512 0.69117647 0.64583333 0.67942584
|
|
0.70558376 0.69306931 0.68396226 0.75449102]
|
|
|
|
mean value: 0.6898144699183257
|
|
|
|
key: test_recall
|
|
value: [0.72222222 0.72222222 0.83333333 0.89473684 0.84210526 0.89473684
|
|
0.63157895 0.68421053 0.89473684 0.72222222]
|
|
|
|
mean value: 0.7842105263157895
|
|
|
|
key: train_recall
|
|
value: [0.85714286 0.85119048 0.85119048 0.84431138 0.92814371 0.8502994
|
|
0.83233533 0.83832335 0.86826347 0.75 ]
|
|
|
|
mean value: 0.8471200456230397
|
|
|
|
key: test_roc_auc
|
|
value: [0.64236111 0.54861111 0.72916667 0.71403509 0.6877193 0.71403509
|
|
0.48245614 0.5754386 0.74736842 0.56111111]
|
|
|
|
mean value: 0.6402302631578948
|
|
|
|
key: train_roc_auc
|
|
value: [0.67674661 0.6920186 0.67742006 0.69389482 0.65610084 0.68239608
|
|
0.70602274 0.694524 0.69137811 0.72644928]
|
|
|
|
mean value: 0.6896951122011316
|
|
|
|
key: test_jcc
|
|
value: [0.52 0.46428571 0.625 0.65384615 0.61538462 0.65384615
|
|
0.4137931 0.48148148 0.68 0.48148148]
|
|
|
|
mean value: 0.5589118703773877
|
|
|
|
key: train_jcc
|
|
value: [0.60759494 0.61637931 0.6059322 0.61304348 0.61507937 0.60683761
|
|
0.61777778 0.61135371 0.61965812 0.60287081]
|
|
|
|
mean value: 0.611652732324478
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01088023 0.01055765 0.01056027 0.00973296 0.00953054 0.00951672
|
|
0.00946689 0.00963879 0.0095346 0.00948691]
|
|
|
|
mean value: 0.009890556335449219
|
|
|
|
key: score_time
|
|
value: [0.00957346 0.00956655 0.00960708 0.00879121 0.0087738 0.00882578
|
|
0.00878 0.00879264 0.00875068 0.00882196]
|
|
|
|
mean value: 0.009028315544128418
|
|
|
|
key: test_mcc
|
|
value: [0.53472222 0.29012943 0.47075654 0.39794149 0.4677202 0.39794149
|
|
0.07368421 0.16491228 0.46019501 0.38729833]
|
|
|
|
mean value: 0.36453012120102724
|
|
|
|
key: train_mcc
|
|
value: [0.44063855 0.50166137 0.48731932 0.46122141 0.47471905 0.45496383
|
|
0.50820014 0.48807757 0.48823787 0.46926792]
|
|
|
|
mean value: 0.47743070304044977
|
|
|
|
key: test_accuracy
|
|
value: [0.76470588 0.64705882 0.73529412 0.70588235 0.73529412 0.70588235
|
|
0.52941176 0.58823529 0.73529412 0.6969697 ]
|
|
|
|
mean value: 0.6844028520499109
|
|
|
|
key: train_accuracy
|
|
value: [0.72459016 0.75409836 0.74754098 0.73442623 0.74098361 0.73114754
|
|
0.75737705 0.74754098 0.74754098 0.73856209]
|
|
|
|
mean value: 0.7423807993142613
|
|
|
|
key: test_fscore
|
|
value: [0.76470588 0.7 0.76923077 0.75 0.75675676 0.75
|
|
0.52941176 0.63157895 0.76923077 0.75 ]
|
|
|
|
mean value: 0.717091488964554
|
|
|
|
key: train_fscore
|
|
value: [0.75722543 0.78005865 0.77809798 0.76790831 0.77233429 0.76162791
|
|
0.78735632 0.77936963 0.77681159 0.77142857]
|
|
|
|
mean value: 0.773221869261951
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.63636364 0.71428571 0.71428571 0.77777778 0.71428571
|
|
0.6 0.63157895 0.75 0.68181818]
|
|
|
|
mean value: 0.703289568618516
|
|
|
|
key: train_precision
|
|
value: [0.73595506 0.76878613 0.75418994 0.73626374 0.74444444 0.74011299
|
|
0.75690608 0.74725275 0.75280899 0.74175824]
|
|
|
|
mean value: 0.7478478357663048
|
|
|
|
key: test_recall
|
|
value: [0.72222222 0.77777778 0.83333333 0.78947368 0.73684211 0.78947368
|
|
0.47368421 0.63157895 0.78947368 0.83333333]
|
|
|
|
mean value: 0.737719298245614
|
|
|
|
key: train_recall
|
|
value: [0.7797619 0.79166667 0.80357143 0.80239521 0.80239521 0.78443114
|
|
0.82035928 0.81437126 0.80239521 0.80357143]
|
|
|
|
mean value: 0.800491873396065
|
|
|
|
key: test_roc_auc
|
|
value: [0.76736111 0.63888889 0.72916667 0.69473684 0.73508772 0.69473684
|
|
0.53684211 0.58245614 0.72807018 0.68333333]
|
|
|
|
mean value: 0.6790679824561404
|
|
|
|
key: train_roc_auc
|
|
value: [0.71834811 0.74984793 0.74120177 0.72728456 0.73453094 0.7255489
|
|
0.75075935 0.74051896 0.74177731 0.73149586]
|
|
|
|
mean value: 0.7361313698938433
|
|
|
|
key: test_jcc
|
|
value: [0.61904762 0.53846154 0.625 0.6 0.60869565 0.6
|
|
0.36 0.46153846 0.625 0.6 ]
|
|
|
|
mean value: 0.5637743271221533
|
|
|
|
key: train_jcc
|
|
value: [0.60930233 0.63942308 0.63679245 0.62325581 0.62910798 0.61502347
|
|
0.6492891 0.63849765 0.63507109 0.62790698]
|
|
|
|
mean value: 0.6303669943587016
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00920081 0.0106101 0.01095128 0.01060462 0.01069379 0.01050758
|
|
0.01083946 0.01047349 0.01055217 0.009341 ]
|
|
|
|
mean value: 0.01037743091583252
|
|
|
|
key: score_time
|
|
value: [0.05469441 0.01746583 0.01230216 0.01333094 0.01750517 0.01473713
|
|
0.012887 0.01271653 0.01439023 0.01208997]
|
|
|
|
mean value: 0.018211936950683592
|
|
|
|
key: test_mcc
|
|
value: [ 0.29012943 0.23570226 0.35355339 -0.03645655 -0.0069809 0.13356983
|
|
0.11169438 0.2760495 0.29617444 0.44777366]
|
|
|
|
mean value: 0.21012094338891965
|
|
|
|
key: train_mcc
|
|
value: [0.55434058 0.51406107 0.51452664 0.50867652 0.5283618 0.53502157
|
|
0.52190532 0.5152887 0.50206797 0.53680825]
|
|
|
|
mean value: 0.5231058430051507
|
|
|
|
key: test_accuracy
|
|
value: [0.64705882 0.61764706 0.67647059 0.5 0.5 0.58823529
|
|
0.55882353 0.64705882 0.64705882 0.72727273]
|
|
|
|
mean value: 0.6109625668449198
|
|
|
|
key: train_accuracy
|
|
value: [0.78032787 0.76065574 0.76065574 0.75737705 0.76721311 0.7704918
|
|
0.76393443 0.76065574 0.75409836 0.77124183]
|
|
|
|
mean value: 0.7646651666130934
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.69767442 0.68571429 0.58536585 0.54054054 0.69565217
|
|
0.59459459 0.7 0.66666667 0.75675676]
|
|
|
|
mean value: 0.6622965290449075
|
|
|
|
key: train_fscore
|
|
value: [0.80911681 0.79320113 0.79551821 0.79096045 0.7965616 0.79885057
|
|
0.79545455 0.79320113 0.78873239 0.80337079]
|
|
|
|
mean value: 0.7964967640300843
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.6 0.70588235 0.54545455 0.55555556 0.59259259
|
|
0.61111111 0.66666667 0.70588235 0.73684211]
|
|
|
|
mean value: 0.6356350918889618
|
|
|
|
key: train_precision
|
|
value: [0.77595628 0.75675676 0.75132275 0.7486631 0.76373626 0.7679558
|
|
0.75675676 0.75268817 0.74468085 0.7606383 ]
|
|
|
|
mean value: 0.7579155036413965
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.83333333 0.66666667 0.63157895 0.52631579 0.84210526
|
|
0.57894737 0.73684211 0.63157895 0.77777778]
|
|
|
|
mean value: 0.7002923976608187
|
|
|
|
key: train_recall
|
|
value: [0.8452381 0.83333333 0.8452381 0.83832335 0.83233533 0.83233533
|
|
0.83832335 0.83832335 0.83832335 0.85119048]
|
|
|
|
mean value: 0.8392964071856287
|
|
|
|
key: test_roc_auc
|
|
value: [0.63888889 0.60416667 0.67708333 0.48245614 0.49649123 0.55438596
|
|
0.55614035 0.63508772 0.64912281 0.72222222]
|
|
|
|
mean value: 0.6016045321637427
|
|
|
|
key: train_roc_auc
|
|
value: [0.77298401 0.75243309 0.7510862 0.74887182 0.76037056 0.76399375
|
|
0.7561182 0.75249501 0.74524863 0.76255176]
|
|
|
|
mean value: 0.7566153039842245
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.53571429 0.52173913 0.4137931 0.37037037 0.53333333
|
|
0.42307692 0.53846154 0.5 0.60869565]
|
|
|
|
mean value: 0.4983645875474961
|
|
|
|
key: train_jcc
|
|
value: [0.67942584 0.657277 0.66046512 0.65420561 0.66190476 0.66507177
|
|
0.66037736 0.657277 0.65116279 0.6713615 ]
|
|
|
|
mean value: 0.6618528735461957
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01780105 0.01477504 0.0150125 0.01503873 0.01491594 0.01546621
|
|
0.01481795 0.01480412 0.01506019 0.01500607]
|
|
|
|
mean value: 0.015269780158996582
|
|
|
|
key: score_time
|
|
value: [0.01043487 0.0101912 0.01028466 0.01025176 0.01074624 0.01024556
|
|
0.01018739 0.0101676 0.01023769 0.01015687]
|
|
|
|
mean value: 0.010290384292602539
|
|
|
|
key: test_mcc
|
|
value: [0.53673944 0.53673944 0.52777778 0.65171146 0.45935257 0.40674623
|
|
0.09911893 0.2760495 0.39794149 0.51639778]
|
|
|
|
mean value: 0.44085746196887043
|
|
|
|
key: train_mcc
|
|
value: [0.63324644 0.63846618 0.65602773 0.66555159 0.66555159 0.69084462
|
|
0.66462713 0.68253944 0.67097764 0.66233521]
|
|
|
|
mean value: 0.6630167569871395
|
|
|
|
key: test_accuracy
|
|
value: [0.76470588 0.76470588 0.76470588 0.82352941 0.73529412 0.70588235
|
|
0.55882353 0.64705882 0.70588235 0.75757576]
|
|
|
|
mean value: 0.7228163992869875
|
|
|
|
key: train_accuracy
|
|
value: [0.81639344 0.81967213 0.8295082 0.83278689 0.83278689 0.84590164
|
|
0.83278689 0.84262295 0.83606557 0.83006536]
|
|
|
|
mean value: 0.8318589949641059
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.8 0.77777778 0.85714286 0.7804878 0.77272727
|
|
0.61538462 0.7 0.75 0.8 ]
|
|
|
|
mean value: 0.7653520327910571
|
|
|
|
key: train_fscore
|
|
value: [0.84615385 0.84764543 0.85310734 0.85714286 0.85714286 0.86685552
|
|
0.85633803 0.86206897 0.85875706 0.85714286]
|
|
|
|
mean value: 0.8562354771490535
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.72727273 0.77777778 0.7826087 0.72727273 0.68
|
|
0.6 0.66666667 0.71428571 0.72727273]
|
|
|
|
mean value: 0.7130429763473242
|
|
|
|
key: train_precision
|
|
value: [0.78571429 0.79274611 0.81182796 0.80526316 0.80526316 0.82258065
|
|
0.80851064 0.82872928 0.81283422 0.79591837]
|
|
|
|
mean value: 0.8069387829655632
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 0.77777778 0.94736842 0.84210526 0.89473684
|
|
0.63157895 0.73684211 0.78947368 0.88888889]
|
|
|
|
mean value: 0.8286549707602339
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.91071429 0.89880952 0.91616766 0.91616766 0.91616766
|
|
0.91017964 0.89820359 0.91017964 0.92857143]
|
|
|
|
mean value: 0.9121827773025377
|
|
|
|
key: test_roc_auc
|
|
value: [0.75694444 0.75694444 0.76388889 0.80701754 0.72105263 0.68070175
|
|
0.54912281 0.63508772 0.69473684 0.74444444]
|
|
|
|
mean value: 0.7109941520467836
|
|
|
|
key: train_roc_auc
|
|
value: [0.80504866 0.80937174 0.82166754 0.82402586 0.82402586 0.83851861
|
|
0.82465504 0.83678296 0.82827823 0.81935818]
|
|
|
|
mean value: 0.8231732674199137
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.66666667 0.63636364 0.75 0.64 0.62962963
|
|
0.44444444 0.53846154 0.6 0.66666667]
|
|
|
|
mean value: 0.6238899248899249
|
|
|
|
key: train_jcc
|
|
value: [0.73333333 0.73557692 0.74384236 0.75 0.75 0.765
|
|
0.74876847 0.75757576 0.75247525 0.75 ]
|
|
|
|
mean value: 0.748657209894919
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.32135892 1.20153308 1.87303424 1.63645196 1.36333013 1.62572312
|
|
1.56750178 1.19691849 1.34020376 1.23293519]
|
|
|
|
mean value: 1.4358990669250489
|
|
|
|
key: score_time
|
|
value: [0.01480174 0.01475239 0.01553249 0.01567817 0.01371741 0.01532006
|
|
0.0150578 0.01520371 0.01264095 0.01516676]
|
|
|
|
mean value: 0.01478714942932129
|
|
|
|
key: test_mcc
|
|
value: [0.64583333 0.66004445 0.53472222 0.52280702 0.4677202 0.33540029
|
|
0.28421053 0.53311399 0.20536002 0.38497419]
|
|
|
|
mean value: 0.4574186238402043
|
|
|
|
key: train_mcc
|
|
value: [0.97376902 0.96038262 0.97376902 0.99340041 0.97358333 0.96734255
|
|
0.98030477 0.98030477 0.97380732 0.98035626]
|
|
|
|
mean value: 0.9757020075835581
|
|
|
|
key: test_accuracy
|
|
value: [0.82352941 0.82352941 0.76470588 0.76470588 0.73529412 0.67647059
|
|
0.64705882 0.76470588 0.61764706 0.6969697 ]
|
|
|
|
mean value: 0.7314616755793226
|
|
|
|
key: train_accuracy
|
|
value: [0.98688525 0.98032787 0.98688525 0.99672131 0.98688525 0.98360656
|
|
0.99016393 0.99016393 0.98688525 0.99019608]
|
|
|
|
mean value: 0.9878720668595308
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.85 0.76470588 0.78947368 0.75675676 0.73170732
|
|
0.68421053 0.77777778 0.71111111 0.73684211]
|
|
|
|
mean value: 0.7635918494194565
|
|
|
|
key: train_fscore
|
|
value: [0.98823529 0.98203593 0.98823529 0.99701493 0.98809524 0.98525074
|
|
0.99109792 0.99109792 0.98816568 0.99115044]
|
|
|
|
mean value: 0.9890379385959084
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.77272727 0.8125 0.78947368 0.77777778 0.68181818
|
|
0.68421053 0.82352941 0.61538462 0.7 ]
|
|
|
|
mean value: 0.7490754803332202
|
|
|
|
key: train_precision
|
|
value: [0.97674419 0.98795181 0.97674419 0.99404762 0.98224852 0.97093023
|
|
0.98235294 0.98235294 0.97660819 0.98245614]
|
|
|
|
mean value: 0.9812436761476078
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.94444444 0.72222222 0.78947368 0.73684211 0.78947368
|
|
0.68421053 0.73684211 0.84210526 0.77777778]
|
|
|
|
mean value: 0.7856725146198831
|
|
|
|
key: train_recall
|
|
value: [1. 0.97619048 1. 1. 0.99401198 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.997020245223838
|
|
|
|
key: test_roc_auc
|
|
value: [0.82291667 0.81597222 0.76736111 0.76140351 0.73508772 0.66140351
|
|
0.64210526 0.76842105 0.5877193 0.68888889]
|
|
|
|
mean value: 0.7251279239766082
|
|
|
|
key: train_roc_auc
|
|
value: [0.98540146 0.98079597 0.98540146 0.99637681 0.98613642 0.98188406
|
|
0.98913043 0.98913043 0.98550725 0.98913043]
|
|
|
|
mean value: 0.986889473082669
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.73913043 0.61904762 0.65217391 0.60869565 0.57692308
|
|
0.52 0.63636364 0.55172414 0.58333333]
|
|
|
|
mean value: 0.6201677517884414
|
|
|
|
key: train_jcc
|
|
value: [0.97674419 0.96470588 0.97674419 0.99404762 0.97647059 0.97093023
|
|
0.98235294 0.98235294 0.97660819 0.98245614]
|
|
|
|
mean value: 0.9783412904125338
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02829695 0.0228796 0.02054882 0.02283907 0.01954627 0.02025723
|
|
0.02166963 0.02095771 0.01940727 0.02203822]
|
|
|
|
mean value: 0.021844077110290527
|
|
|
|
key: score_time
|
|
value: [0.01208591 0.00928068 0.00880885 0.00888658 0.0087049 0.00878859
|
|
0.00872684 0.0087254 0.00870728 0.0126493 ]
|
|
|
|
mean value: 0.009536433219909667
|
|
|
|
key: test_mcc
|
|
value: [0.65277778 0.47140452 0.29166667 0.41464421 0.4677202 0.59648091
|
|
0.23036965 0.34904492 0.23036965 0.51639778]
|
|
|
|
mean value: 0.42208762875142947
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.82352941 0.73529412 0.64705882 0.70588235 0.73529412 0.79411765
|
|
0.61764706 0.67647059 0.61764706 0.75757576]
|
|
|
|
mean value: 0.7110516934046346
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.74285714 0.66666667 0.72222222 0.75675676 0.8372093
|
|
0.64864865 0.7027027 0.64864865 0.8 ]
|
|
|
|
mean value: 0.7349241502593076
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.76470588 0.66666667 0.76470588 0.77777778 0.75
|
|
0.66666667 0.72222222 0.66666667 0.72727273]
|
|
|
|
mean value: 0.738168449197861
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.72222222 0.66666667 0.68421053 0.73684211 0.94736842
|
|
0.63157895 0.68421053 0.63157895 0.88888889]
|
|
|
|
mean value: 0.7371345029239766
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.82638889 0.73611111 0.64583333 0.70877193 0.73508772 0.77368421
|
|
0.61578947 0.6754386 0.61578947 0.74444444]
|
|
|
|
mean value: 0.7077339181286549
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.59090909 0.5 0.56521739 0.60869565 0.72
|
|
0.48 0.54166667 0.48 0.66666667]
|
|
|
|
mean value: 0.5853155467720685
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12284851 0.1233356 0.12332082 0.12236309 0.12244511 0.11108303
|
|
0.11237359 0.12249541 0.12103868 0.11180711]
|
|
|
|
mean value: 0.11931109428405762
|
|
|
|
key: score_time
|
|
value: [0.0193882 0.01934218 0.0192337 0.01919889 0.01940441 0.0176692
|
|
0.01935768 0.01922607 0.01762199 0.01920247]
|
|
|
|
mean value: 0.018964481353759766
|
|
|
|
key: test_mcc
|
|
value: [0.4677202 0.41343443 0.36300672 0.60035727 0.52280702 0.39794149
|
|
0.40350877 0.51983348 0.15415752 0.32463591]
|
|
|
|
mean value: 0.4167402805388658
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73529412 0.70588235 0.67647059 0.79411765 0.76470588 0.70588235
|
|
0.70588235 0.76470588 0.58823529 0.66666667]
|
|
|
|
mean value: 0.7107843137254902
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75675676 0.75 0.66666667 0.8 0.78947368 0.75
|
|
0.73684211 0.8 0.65 0.7027027 ]
|
|
|
|
mean value: 0.740244191559981
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.68181818 0.73333333 0.875 0.78947368 0.71428571
|
|
0.73684211 0.76190476 0.61904762 0.68421053]
|
|
|
|
mean value: 0.7332758031442241
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.83333333 0.61111111 0.73684211 0.78947368 0.78947368
|
|
0.73684211 0.84210526 0.68421053 0.72222222]
|
|
|
|
mean value: 0.7523391812865498
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.73263889 0.69791667 0.68055556 0.80175439 0.76140351 0.69473684
|
|
0.70175439 0.75438596 0.5754386 0.66111111]
|
|
|
|
mean value: 0.7061695906432749
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.60869565 0.6 0.5 0.66666667 0.65217391 0.6
|
|
0.58333333 0.66666667 0.48148148 0.54166667]
|
|
|
|
mean value: 0.5900684380032206
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01088667 0.01070976 0.0108645 0.01091766 0.01080966 0.01082206
|
|
0.01088333 0.01081562 0.01062369 0.01084924]
|
|
|
|
mean value: 0.010818219184875489
|
|
|
|
key: score_time
|
|
value: [0.00965977 0.0096581 0.00963473 0.00953102 0.00956631 0.00961757
|
|
0.00960183 0.00961995 0.0095768 0.00963449]
|
|
|
|
mean value: 0.00961005687713623
|
|
|
|
key: test_mcc
|
|
value: [ 0.18055556 0.59201201 0.2030906 0.43157895 0.21144801 -0.03645655
|
|
-0.02123977 0.14523855 0.2760495 0.22222222]
|
|
|
|
mean value: 0.22044990826698835
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.58823529 0.79411765 0.58823529 0.70588235 0.61764706 0.5
|
|
0.5 0.58823529 0.64705882 0.60606061]
|
|
|
|
mean value: 0.6135472370766488
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.82051282 0.53333333 0.70588235 0.68292683 0.58536585
|
|
0.56410256 0.66666667 0.7 0.60606061]
|
|
|
|
mean value: 0.6453086320661643
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 0.76190476 0.66666667 0.8 0.63636364 0.54545455
|
|
0.55 0.60869565 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6527418595896857
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.88888889 0.44444444 0.63157895 0.73684211 0.63157895
|
|
0.57894737 0.73684211 0.73684211 0.55555556]
|
|
|
|
mean value: 0.6497076023391812
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.59027778 0.78819444 0.59722222 0.71578947 0.60175439 0.48245614
|
|
0.48947368 0.56842105 0.63508772 0.61111111]
|
|
|
|
mean value: 0.6079788011695907
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.69565217 0.36363636 0.54545455 0.51851852 0.4137931
|
|
0.39285714 0.5 0.53846154 0.43478261]
|
|
|
|
mean value: 0.4819822661651747
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.64474487 1.55277252 1.56014156 1.55931854 1.54470897 1.57435083
|
|
1.5516119 1.55370855 1.54520106 1.55006909]
|
|
|
|
mean value: 1.5636627912521361
|
|
|
|
key: score_time
|
|
value: [0.09955883 0.09734106 0.09075069 0.14853644 0.09100676 0.09602427
|
|
0.09270549 0.09267473 0.09793282 0.09106135]
|
|
|
|
mean value: 0.09975924491882324
|
|
|
|
key: test_mcc
|
|
value: [0.58639547 0.66004445 0.47140452 0.52280702 0.60035727 0.54101198
|
|
0.4677202 0.52280702 0.17770466 0.51111111]
|
|
|
|
mean value: 0.5061363688588446
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79411765 0.82352941 0.73529412 0.76470588 0.79411765 0.76470588
|
|
0.73529412 0.76470588 0.58823529 0.75757576]
|
|
|
|
mean value: 0.7522281639928698
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.81081081 0.85 0.74285714 0.78947368 0.8 0.81818182
|
|
0.75675676 0.78947368 0.61111111 0.77777778]
|
|
|
|
mean value: 0.774644278591647
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.77272727 0.76470588 0.78947368 0.875 0.72
|
|
0.77777778 0.78947368 0.64705882 0.77777778]
|
|
|
|
mean value: 0.770346858679676
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.94444444 0.72222222 0.78947368 0.73684211 0.94736842
|
|
0.73684211 0.78947368 0.57894737 0.77777778]
|
|
|
|
mean value: 0.7856725146198831
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.81597222 0.73611111 0.76140351 0.80175439 0.74035088
|
|
0.73508772 0.76140351 0.58947368 0.75555556]
|
|
|
|
mean value: 0.7488779239766082
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.68181818 0.73913043 0.59090909 0.65217391 0.66666667 0.69230769
|
|
0.60869565 0.65217391 0.44 0.63636364]
|
|
|
|
mean value: 0.6360239181108747
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.79037571 0.94716096 1.01279712 1.02011991 1.05227804 0.92565942
|
|
0.96677542 0.93800211 0.95369363 0.88223243]
|
|
|
|
mean value: 1.0489094734191895
|
|
|
|
key: score_time
|
|
value: [0.26673937 0.26944971 0.16845083 0.17244506 0.20465398 0.179075
|
|
0.2911427 0.24270773 0.23012209 0.2041471 ]
|
|
|
|
mean value: 0.2228933572769165
|
|
|
|
key: test_mcc
|
|
value: [0.58639547 0.66004445 0.47140452 0.51983348 0.60035727 0.65171146
|
|
0.40350877 0.64210526 0.24433145 0.51111111]
|
|
|
|
mean value: 0.5290803241601866
|
|
|
|
key: train_mcc
|
|
value: [0.8749177 0.86799921 0.86763734 0.88797153 0.88764164 0.88209417
|
|
0.89417253 0.88797153 0.895442 0.86272864]
|
|
|
|
mean value: 0.8808576302233753
|
|
|
|
key: test_accuracy
|
|
value: [0.79411765 0.82352941 0.73529412 0.76470588 0.79411765 0.82352941
|
|
0.70588235 0.82352941 0.61764706 0.75757576]
|
|
|
|
mean value: 0.7639928698752227
|
|
|
|
key: train_accuracy
|
|
value: [0.93770492 0.93442623 0.93442623 0.9442623 0.9442623 0.94098361
|
|
0.94754098 0.9442623 0.94754098 0.93137255]
|
|
|
|
mean value: 0.9406782385085182
|
|
|
|
key: test_fscore
|
|
value: [0.81081081 0.85 0.74285714 0.8 0.8 0.85714286
|
|
0.73684211 0.84210526 0.62857143 0.77777778]
|
|
|
|
mean value: 0.7846107385581069
|
|
|
|
key: train_fscore
|
|
value: [0.94492754 0.94186047 0.94152047 0.95014663 0.94985251 0.94767442
|
|
0.95266272 0.95014663 0.95348837 0.93948127]
|
|
|
|
mean value: 0.9471761012293709
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.77272727 0.76470588 0.76190476 0.875 0.7826087
|
|
0.73684211 0.84210526 0.6875 0.77777778]
|
|
|
|
mean value: 0.7790645443046507
|
|
|
|
key: train_precision
|
|
value: [0.92090395 0.92045455 0.92528736 0.93103448 0.93604651 0.92090395
|
|
0.94152047 0.93103448 0.92655367 0.91061453]
|
|
|
|
mean value: 0.9264353953818358
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.94444444 0.72222222 0.84210526 0.73684211 0.94736842
|
|
0.73684211 0.84210526 0.57894737 0.77777778]
|
|
|
|
mean value: 0.7961988304093567
|
|
|
|
key: train_recall
|
|
value: [0.9702381 0.96428571 0.95833333 0.97005988 0.96407186 0.9760479
|
|
0.96407186 0.97005988 0.98203593 0.9702381 ]
|
|
|
|
mean value: 0.968944254348446
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.81597222 0.73611111 0.75438596 0.80175439 0.80701754
|
|
0.70175439 0.82105263 0.62280702 0.75555556]
|
|
|
|
mean value: 0.7608077485380118
|
|
|
|
key: train_roc_auc
|
|
value: [0.93402416 0.93104797 0.93172141 0.94155168 0.94218086 0.93729931
|
|
0.94580404 0.94155168 0.94391651 0.92714803]
|
|
|
|
mean value: 0.9376245655535734
|
|
|
|
key: test_jcc
|
|
value: [0.68181818 0.73913043 0.59090909 0.66666667 0.66666667 0.75
|
|
0.58333333 0.72727273 0.45833333 0.63636364]
|
|
|
|
mean value: 0.6500494071146244
|
|
|
|
key: train_jcc
|
|
value: [0.8956044 0.89010989 0.88950276 0.90502793 0.90449438 0.90055249
|
|
0.90960452 0.90502793 0.91111111 0.88586957]
|
|
|
|
mean value: 0.8996904978379844
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01094341 0.01144147 0.00983262 0.00971198 0.01084304 0.00970316
|
|
0.01028466 0.01058125 0.00986981 0.00982785]
|
|
|
|
mean value: 0.010303926467895509
|
|
|
|
key: score_time
|
|
value: [0.00899744 0.00984764 0.00904608 0.00897098 0.00900269 0.00904703
|
|
0.00899124 0.00921154 0.00883937 0.00896144]
|
|
|
|
mean value: 0.009091544151306152
|
|
|
|
key: test_mcc
|
|
value: [0.53472222 0.29012943 0.47075654 0.39794149 0.4677202 0.39794149
|
|
0.07368421 0.16491228 0.46019501 0.38729833]
|
|
|
|
mean value: 0.36453012120102724
|
|
|
|
key: train_mcc
|
|
value: [0.44063855 0.50166137 0.48731932 0.46122141 0.47471905 0.45496383
|
|
0.50820014 0.48807757 0.48823787 0.46926792]
|
|
|
|
mean value: 0.47743070304044977
|
|
|
|
key: test_accuracy
|
|
value: [0.76470588 0.64705882 0.73529412 0.70588235 0.73529412 0.70588235
|
|
0.52941176 0.58823529 0.73529412 0.6969697 ]
|
|
|
|
mean value: 0.6844028520499109
|
|
|
|
key: train_accuracy
|
|
value: [0.72459016 0.75409836 0.74754098 0.73442623 0.74098361 0.73114754
|
|
0.75737705 0.74754098 0.74754098 0.73856209]
|
|
|
|
mean value: 0.7423807993142613
|
|
|
|
key: test_fscore
|
|
value: [0.76470588 0.7 0.76923077 0.75 0.75675676 0.75
|
|
0.52941176 0.63157895 0.76923077 0.75 ]
|
|
|
|
mean value: 0.717091488964554
|
|
|
|
key: train_fscore
|
|
value: [0.75722543 0.78005865 0.77809798 0.76790831 0.77233429 0.76162791
|
|
0.78735632 0.77936963 0.77681159 0.77142857]
|
|
|
|
mean value: 0.773221869261951
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.63636364 0.71428571 0.71428571 0.77777778 0.71428571
|
|
0.6 0.63157895 0.75 0.68181818]
|
|
|
|
mean value: 0.703289568618516
|
|
|
|
key: train_precision
|
|
value: [0.73595506 0.76878613 0.75418994 0.73626374 0.74444444 0.74011299
|
|
0.75690608 0.74725275 0.75280899 0.74175824]
|
|
|
|
mean value: 0.7478478357663048
|
|
|
|
key: test_recall
|
|
value: [0.72222222 0.77777778 0.83333333 0.78947368 0.73684211 0.78947368
|
|
0.47368421 0.63157895 0.78947368 0.83333333]
|
|
|
|
mean value: 0.737719298245614
|
|
|
|
key: train_recall
|
|
value: [0.7797619 0.79166667 0.80357143 0.80239521 0.80239521 0.78443114
|
|
0.82035928 0.81437126 0.80239521 0.80357143]
|
|
|
|
mean value: 0.800491873396065
|
|
|
|
key: test_roc_auc
|
|
value: [0.76736111 0.63888889 0.72916667 0.69473684 0.73508772 0.69473684
|
|
0.53684211 0.58245614 0.72807018 0.68333333]
|
|
|
|
mean value: 0.6790679824561404
|
|
|
|
key: train_roc_auc
|
|
value: [0.71834811 0.74984793 0.74120177 0.72728456 0.73453094 0.7255489
|
|
0.75075935 0.74051896 0.74177731 0.73149586]
|
|
|
|
mean value: 0.7361313698938433
|
|
|
|
key: test_jcc
|
|
value: [0.61904762 0.53846154 0.625 0.6 0.60869565 0.6
|
|
0.36 0.46153846 0.625 0.6 ]
|
|
|
|
mean value: 0.5637743271221533
|
|
|
|
key: train_jcc
|
|
value: [0.60930233 0.63942308 0.63679245 0.62325581 0.62910798 0.61502347
|
|
0.6492891 0.63849765 0.63507109 0.62790698]
|
|
|
|
mean value: 0.6303669943587016
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.12188196 0.07556415 0.07521582 0.07375193 0.07369685 0.0758543
|
|
0.0753386 0.08109188 0.0737834 0.07879972]
|
|
|
|
mean value: 0.0804978609085083
|
|
|
|
key: score_time
|
|
value: [0.01106882 0.01126862 0.01102591 0.01076293 0.01078033 0.01100802
|
|
0.01146555 0.01110268 0.01160359 0.01099086]
|
|
|
|
mean value: 0.011107730865478515
|
|
|
|
key: test_mcc
|
|
value: [0.76388889 0.70507075 0.53472222 0.58330485 0.65158377 0.7009124
|
|
0.28421053 0.65158377 0.4677202 0.44777366]
|
|
|
|
mean value: 0.5790771029050447
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88235294 0.85294118 0.76470588 0.79411765 0.82352941 0.85294118
|
|
0.64705882 0.82352941 0.73529412 0.72727273]
|
|
|
|
mean value: 0.7903743315508022
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.86486486 0.76470588 0.82926829 0.83333333 0.87179487
|
|
0.68421053 0.83333333 0.75675676 0.75675676]
|
|
|
|
mean value: 0.8083913507080462
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.84210526 0.8125 0.77272727 0.88235294 0.85
|
|
0.68421053 0.88235294 0.77777778 0.73684211]
|
|
|
|
mean value: 0.8129757716483722
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 0.72222222 0.89473684 0.78947368 0.89473684
|
|
0.68421053 0.78947368 0.73684211 0.77777778]
|
|
|
|
mean value: 0.8067251461988304
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88194444 0.85069444 0.76736111 0.78070175 0.82807018 0.84736842
|
|
0.64210526 0.82807018 0.73508772 0.72222222]
|
|
|
|
mean value: 0.7883625730994153
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.76190476 0.61904762 0.70833333 0.71428571 0.77272727
|
|
0.52 0.71428571 0.60869565 0.60869565]
|
|
|
|
mean value: 0.6827975719932242
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04658723 0.04880452 0.05894375 0.05892873 0.06675482 0.0693748
|
|
0.06614852 0.06622481 0.06900764 0.05905008]
|
|
|
|
mean value: 0.060982489585876466
|
|
|
|
key: score_time
|
|
value: [0.01220036 0.03400898 0.02189231 0.02129936 0.02581954 0.01575804
|
|
0.02100849 0.02393556 0.02214861 0.02210212]
|
|
|
|
mean value: 0.02201733589172363
|
|
|
|
key: test_mcc
|
|
value: [0.48168199 0.4677202 0.54935027 0.36300672 0.34904492 0.39794149
|
|
0.41464421 0.43157895 0.20536002 0.33210558]
|
|
|
|
mean value: 0.39924343631182585
|
|
|
|
key: train_mcc
|
|
value: [0.81426335 0.79434117 0.79448967 0.81466352 0.81466352 0.79512604
|
|
0.84108267 0.78137376 0.81466352 0.79519112]
|
|
|
|
mean value: 0.8059858342351645
|
|
|
|
key: test_accuracy
|
|
value: [0.73529412 0.73529412 0.76470588 0.67647059 0.67647059 0.70588235
|
|
0.70588235 0.70588235 0.61764706 0.66666667]
|
|
|
|
mean value: 0.6990196078431373
|
|
|
|
key: train_accuracy
|
|
value: [0.90819672 0.89836066 0.89836066 0.90819672 0.90819672 0.89836066
|
|
0.92131148 0.89180328 0.90819672 0.89869281]
|
|
|
|
mean value: 0.9039676417014894
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.75675676 0.75 0.68571429 0.7027027 0.75
|
|
0.72222222 0.70588235 0.71111111 0.68571429]
|
|
|
|
mean value: 0.7197376444435268
|
|
|
|
key: train_fscore
|
|
value: [0.91764706 0.90855457 0.90962099 0.91764706 0.91764706 0.90962099
|
|
0.92899408 0.90265487 0.91764706 0.90909091]
|
|
|
|
mean value: 0.9139124649260575
|
|
|
|
key: test_precision
|
|
value: [0.8 0.73684211 0.85714286 0.75 0.72222222 0.71428571
|
|
0.76470588 0.8 0.61538462 0.70588235]
|
|
|
|
mean value: 0.7466465749592684
|
|
|
|
key: train_precision
|
|
value: [0.90697674 0.9005848 0.89142857 0.9017341 0.9017341 0.88636364
|
|
0.91812865 0.88953488 0.9017341 0.89595376]
|
|
|
|
mean value: 0.8994173355355743
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.77777778 0.66666667 0.63157895 0.68421053 0.78947368
|
|
0.68421053 0.63157895 0.84210526 0.66666667]
|
|
|
|
mean value: 0.704093567251462
|
|
|
|
key: train_recall
|
|
value: [0.92857143 0.91666667 0.92857143 0.93413174 0.93413174 0.93413174
|
|
0.94011976 0.91616766 0.93413174 0.92261905]
|
|
|
|
mean value: 0.9289242942686057
|
|
|
|
key: test_roc_auc
|
|
value: [0.73958333 0.73263889 0.77083333 0.68245614 0.6754386 0.69473684
|
|
0.70877193 0.71578947 0.5877193 0.66666667]
|
|
|
|
mean value: 0.6974634502923976
|
|
|
|
key: train_roc_auc
|
|
value: [0.90589155 0.89628954 0.89494265 0.90547167 0.90547167 0.8946021
|
|
0.91933524 0.88924325 0.90547167 0.89609213]
|
|
|
|
mean value: 0.9012811463937972
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.60869565 0.6 0.52173913 0.54166667 0.6
|
|
0.56521739 0.54545455 0.55172414 0.52173913]
|
|
|
|
mean value: 0.5627665225828644
|
|
|
|
key: train_jcc
|
|
value: [0.84782609 0.83243243 0.8342246 0.84782609 0.84782609 0.8342246
|
|
0.86740331 0.82258065 0.84782609 0.83333333]
|
|
|
|
mean value: 0.8415503271531233
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02421403 0.009835 0.0093627 0.00928998 0.0093646 0.00928926
|
|
0.00923896 0.01016259 0.00957632 0.00947571]
|
|
|
|
mean value: 0.010980916023254395
|
|
|
|
key: score_time
|
|
value: [0.01043844 0.00904918 0.00872874 0.00870085 0.00876427 0.00863504
|
|
0.00874949 0.00907063 0.00901103 0.00872445]
|
|
|
|
mean value: 0.008987212181091308
|
|
|
|
key: test_mcc
|
|
value: [0.40849122 0.22968635 0.52777778 0.51983348 0.58055371 0.51983348
|
|
0.03226553 0.04561404 0.33540029 0.19551858]
|
|
|
|
mean value: 0.33949744481723154
|
|
|
|
key: train_mcc
|
|
value: [0.39948089 0.40611709 0.41270124 0.40774897 0.40724179 0.4072543
|
|
0.40750911 0.42758624 0.41403163 0.44232658]
|
|
|
|
mean value: 0.4131997829502527
|
|
|
|
key: test_accuracy
|
|
value: [0.70588235 0.61764706 0.76470588 0.76470588 0.79411765 0.76470588
|
|
0.52941176 0.52941176 0.67647059 0.60606061]
|
|
|
|
mean value: 0.6753119429590018
|
|
|
|
key: train_accuracy
|
|
value: [0.70491803 0.70819672 0.71147541 0.70819672 0.70819672 0.70819672
|
|
0.70819672 0.71803279 0.71147541 0.7254902 ]
|
|
|
|
mean value: 0.711237544198007
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.68292683 0.77777778 0.8 0.82051282 0.8
|
|
0.6 0.57894737 0.73170732 0.66666667]
|
|
|
|
mean value: 0.7195380884982939
|
|
|
|
key: train_fscore
|
|
value: [0.74431818 0.74787535 0.75418994 0.74351585 0.74929577 0.74787535
|
|
0.74498567 0.75428571 0.75 0.76271186]
|
|
|
|
mean value: 0.7499053711004466
|
|
|
|
key: test_precision
|
|
value: [0.7 0.60869565 0.77777778 0.76190476 0.8 0.76190476
|
|
0.57142857 0.57894737 0.68181818 0.61904762]
|
|
|
|
mean value: 0.686152469447664
|
|
|
|
key: train_precision
|
|
value: [0.71195652 0.71351351 0.71052632 0.71666667 0.70744681 0.70967742
|
|
0.71428571 0.72131148 0.71351351 0.72580645]
|
|
|
|
mean value: 0.7144704400396229
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.77777778 0.77777778 0.84210526 0.84210526 0.84210526
|
|
0.63157895 0.57894737 0.78947368 0.72222222]
|
|
|
|
mean value: 0.758187134502924
|
|
|
|
key: train_recall
|
|
value: [0.7797619 0.78571429 0.80357143 0.77245509 0.79640719 0.79041916
|
|
0.77844311 0.79041916 0.79041916 0.80357143]
|
|
|
|
mean value: 0.7891181921870545
|
|
|
|
key: test_roc_auc
|
|
value: [0.70138889 0.60763889 0.76388889 0.75438596 0.7877193 0.75438596
|
|
0.51578947 0.52280702 0.66140351 0.59444444]
|
|
|
|
mean value: 0.6663852339181286
|
|
|
|
key: train_roc_auc
|
|
value: [0.6964503 0.69942649 0.70105579 0.70144494 0.69892823 0.69955741
|
|
0.70081576 0.71042697 0.7031806 0.71700311]
|
|
|
|
mean value: 0.7028289575131823
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.51851852 0.63636364 0.66666667 0.69565217 0.66666667
|
|
0.42857143 0.40740741 0.57692308 0.5 ]
|
|
|
|
mean value: 0.5680102908363778
|
|
|
|
key: train_jcc
|
|
value: [0.59276018 0.59728507 0.60538117 0.59174312 0.5990991 0.59728507
|
|
0.59360731 0.60550459 0.6 0.61643836]
|
|
|
|
mean value: 0.5999103950282938
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0122931 0.01547909 0.01465154 0.01761985 0.01728725 0.01541042
|
|
0.01495695 0.01636028 0.01920295 0.01726055]
|
|
|
|
mean value: 0.01605219841003418
|
|
|
|
key: score_time
|
|
value: [0.00867844 0.01110268 0.01112914 0.01157689 0.01164865 0.01181674
|
|
0.01177382 0.01165366 0.01198506 0.0117166 ]
|
|
|
|
mean value: 0.0113081693649292
|
|
|
|
key: test_mcc
|
|
value: [0.60035727 0.6846532 0.48168199 0.4083134 0.39847501 0.41096093
|
|
0.28006577 0.36560566 0.14127455 0.36232865]
|
|
|
|
mean value: 0.41337164278426364
|
|
|
|
key: train_mcc
|
|
value: [0.64320473 0.71463947 0.66311695 0.54037119 0.58619601 0.52292634
|
|
0.60152173 0.47541486 0.54439142 0.68531761]
|
|
|
|
mean value: 0.597710030252377
|
|
|
|
key: test_accuracy
|
|
value: [0.79411765 0.82352941 0.73529412 0.67647059 0.70588235 0.67647059
|
|
0.64705882 0.67647059 0.58823529 0.66666667]
|
|
|
|
mean value: 0.6990196078431372
|
|
|
|
key: train_accuracy
|
|
value: [0.81311475 0.85901639 0.83278689 0.72131148 0.77377049 0.73770492
|
|
0.79016393 0.70491803 0.7442623 0.83660131]
|
|
|
|
mean value: 0.7813650487517412
|
|
|
|
key: test_fscore
|
|
value: [0.78787879 0.85714286 0.72727273 0.64516129 0.76190476 0.7755102
|
|
0.73913043 0.76595745 0.72 0.64516129]
|
|
|
|
mean value: 0.7425119800517047
|
|
|
|
key: train_fscore
|
|
value: [0.81311475 0.87536232 0.85633803 0.66403162 0.82706767 0.80487805
|
|
0.83419689 0.78773585 0.81067961 0.83870968]
|
|
|
|
mean value: 0.8112114468932888
|
|
|
|
key: test_precision
|
|
value: [0.86666667 0.75 0.8 0.83333333 0.69565217 0.63333333
|
|
0.62962963 0.64285714 0.58064516 0.76923077]
|
|
|
|
mean value: 0.7201348210254241
|
|
|
|
key: train_precision
|
|
value: [0.90510949 0.85310734 0.81283422 0.97674419 0.7112069 0.67901235
|
|
0.73515982 0.64980545 0.68163265 0.91549296]
|
|
|
|
mean value: 0.7920105362190161
|
|
|
|
key: test_recall
|
|
value: [0.72222222 1. 0.66666667 0.52631579 0.84210526 1.
|
|
0.89473684 0.94736842 0.94736842 0.55555556]
|
|
|
|
mean value: 0.810233918128655
|
|
|
|
key: train_recall
|
|
value: [0.73809524 0.89880952 0.9047619 0.50299401 0.98802395 0.98802395
|
|
0.96407186 1. 1. 0.77380952]
|
|
|
|
mean value: 0.875858996293128
|
|
|
|
key: test_roc_auc
|
|
value: [0.79861111 0.8125 0.73958333 0.69649123 0.6877193 0.63333333
|
|
0.61403509 0.64035088 0.54035088 0.67777778]
|
|
|
|
mean value: 0.6840752923976607
|
|
|
|
key: train_roc_auc
|
|
value: [0.82160236 0.85451425 0.82464373 0.74425063 0.75125835 0.71140328
|
|
0.771891 0.67391304 0.7173913 0.8434265 ]
|
|
|
|
mean value: 0.7714294452533026
|
|
|
|
key: test_jcc
|
|
value: [0.65 0.75 0.57142857 0.47619048 0.61538462 0.63333333
|
|
0.5862069 0.62068966 0.5625 0.47619048]
|
|
|
|
mean value: 0.594192402425161
|
|
|
|
key: train_jcc
|
|
value: [0.68508287 0.77835052 0.74876847 0.49704142 0.70512821 0.67346939
|
|
0.71555556 0.64980545 0.68163265 0.72222222]
|
|
|
|
mean value: 0.6857056752609968
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01870847 0.01653576 0.01690269 0.02013874 0.01762629 0.01741481
|
|
0.01960111 0.01709795 0.01668668 0.01803088]
|
|
|
|
mean value: 0.01787433624267578
|
|
|
|
key: score_time
|
|
value: [0.01167178 0.01161695 0.01179361 0.01316237 0.01197505 0.01161027
|
|
0.01172209 0.0116086 0.01169157 0.01176 ]
|
|
|
|
mean value: 0.011861228942871093
|
|
|
|
key: test_mcc
|
|
value: [0.58925565 0.60755744 0.73351447 0.52280702 0.53311399 0.41096093
|
|
0.18353259 0.50267455 0.22710999 0.33210558]
|
|
|
|
mean value: 0.46426322123392844
|
|
|
|
key: train_mcc
|
|
value: [0.71529693 0.68305345 0.51484487 0.74854639 0.69193477 0.54032105
|
|
0.65028872 0.7368841 0.72297421 0.67986423]
|
|
|
|
mean value: 0.6684008696544169
|
|
|
|
key: test_accuracy
|
|
value: [0.79411765 0.79411765 0.85294118 0.76470588 0.76470588 0.67647059
|
|
0.55882353 0.73529412 0.61764706 0.66666667]
|
|
|
|
mean value: 0.7225490196078431
|
|
|
|
key: train_accuracy
|
|
value: [0.85901639 0.84262295 0.73442623 0.87540984 0.83934426 0.74754098
|
|
0.81311475 0.86885246 0.84918033 0.83986928]
|
|
|
|
mean value: 0.8269377477767063
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.82926829 0.87804878 0.78947368 0.77777778 0.7755102
|
|
0.48275862 0.72727273 0.73469388 0.68571429]
|
|
|
|
mean value: 0.7480518250468358
|
|
|
|
key: train_fscore
|
|
value: [0.87164179 0.85542169 0.80387409 0.88622754 0.84039088 0.81081081
|
|
0.80808081 0.87804878 0.87894737 0.86350975]
|
|
|
|
mean value: 0.8496953511294554
|
|
|
|
key: test_precision
|
|
value: [0.82352941 0.73913043 0.7826087 0.78947368 0.82352941 0.63333333
|
|
0.7 0.85714286 0.6 0.70588235]
|
|
|
|
mean value: 0.7454630181592088
|
|
|
|
key: train_precision
|
|
value: [0.8742515 0.86585366 0.67755102 0.88622754 0.92142857 0.6875
|
|
0.92307692 0.89440994 0.78403756 0.81151832]
|
|
|
|
mean value: 0.8325855036547385
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.94444444 1. 0.78947368 0.73684211 1.
|
|
0.36842105 0.63157895 0.94736842 0.66666667]
|
|
|
|
mean value: 0.7862573099415204
|
|
|
|
key: train_recall
|
|
value: [0.86904762 0.8452381 0.98809524 0.88622754 0.77245509 0.98802395
|
|
0.71856287 0.86227545 1. 0.92261905]
|
|
|
|
mean value: 0.885254491017964
|
|
|
|
key: test_roc_auc
|
|
value: [0.79513889 0.78472222 0.84375 0.76140351 0.76842105 0.63333333
|
|
0.58421053 0.74912281 0.57368421 0.66666667]
|
|
|
|
mean value: 0.716045321637427
|
|
|
|
key: train_roc_auc
|
|
value: [0.85788147 0.84232708 0.70572645 0.87427319 0.84637247 0.72227285
|
|
0.82304955 0.86954352 0.83333333 0.83087474]
|
|
|
|
mean value: 0.8205654661796686
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.70833333 0.7826087 0.65217391 0.63636364 0.63333333
|
|
0.31818182 0.57142857 0.58064516 0.52173913]
|
|
|
|
mean value: 0.6071474259728117
|
|
|
|
key: train_jcc
|
|
value: [0.77248677 0.74736842 0.67206478 0.79569892 0.7247191 0.68181818
|
|
0.6779661 0.7826087 0.78403756 0.75980392]
|
|
|
|
mean value: 0.7398572456141462
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.15457821 0.13802409 0.14435077 0.16323423 0.1404407 0.13826203
|
|
0.13767624 0.13883495 0.13875985 0.14308405]
|
|
|
|
mean value: 0.14372451305389405
|
|
|
|
key: score_time
|
|
value: [0.01583242 0.01523876 0.01577759 0.01523495 0.01944852 0.01516557
|
|
0.01577592 0.01571465 0.01683474 0.01517773]
|
|
|
|
mean value: 0.0160200834274292
|
|
|
|
key: test_mcc
|
|
value: [0.70710678 0.73351447 0.57300562 0.64172547 0.64210526 0.64210526
|
|
0.21947762 0.58639547 0.39794149 0.32463591]
|
|
|
|
mean value: 0.5468013357767797
|
|
|
|
key: train_mcc
|
|
value: [0.93373459 0.95371452 0.9668698 0.96734255 0.94726239 0.98030477
|
|
0.96695595 0.94051862 0.96691436 0.95385496]
|
|
|
|
mean value: 0.9577472508488036
|
|
|
|
key: test_accuracy
|
|
value: [0.85294118 0.85294118 0.76470588 0.82352941 0.82352941 0.82352941
|
|
0.61764706 0.79411765 0.70588235 0.66666667]
|
|
|
|
mean value: 0.7725490196078431
|
|
|
|
key: train_accuracy
|
|
value: [0.96721311 0.97704918 0.98360656 0.98360656 0.97377049 0.99016393
|
|
0.98360656 0.9704918 0.98360656 0.97712418]
|
|
|
|
mean value: 0.9790238937104896
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.87804878 0.73333333 0.85 0.84210526 0.84210526
|
|
0.66666667 0.81081081 0.75 0.7027027 ]
|
|
|
|
mean value: 0.7932915677459965
|
|
|
|
key: train_fscore
|
|
value: [0.9704142 0.97935103 0.9851632 0.98525074 0.97633136 0.99109792
|
|
0.98498498 0.97329377 0.98507463 0.97910448]
|
|
|
|
mean value: 0.9810066317646712
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.7826087 0.91666667 0.80952381 0.84210526 0.84210526
|
|
0.65 0.83333333 0.71428571 0.68421053]
|
|
|
|
mean value: 0.7957192213269747
|
|
|
|
key: train_precision
|
|
value: [0.96470588 0.97076023 0.98224852 0.97093023 0.96491228 0.98235294
|
|
0.98795181 0.96470588 0.98214286 0.98203593]
|
|
|
|
mean value: 0.975274656628592
|
|
|
|
key: test_recall
|
|
value: [0.83333333 1. 0.61111111 0.89473684 0.84210526 0.84210526
|
|
0.68421053 0.78947368 0.78947368 0.72222222]
|
|
|
|
mean value: 0.8008771929824562
|
|
|
|
key: train_recall
|
|
value: [0.97619048 0.98809524 0.98809524 1. 0.98802395 1.
|
|
0.98203593 0.98203593 0.98802395 0.97619048]
|
|
|
|
mean value: 0.986869118905047
|
|
|
|
key: test_roc_auc
|
|
value: [0.85416667 0.84375 0.77430556 0.81403509 0.82105263 0.82105263
|
|
0.60877193 0.79473684 0.69473684 0.66111111]
|
|
|
|
mean value: 0.7687719298245614
|
|
|
|
key: train_roc_auc
|
|
value: [0.96619743 0.97579944 0.98309871 0.98188406 0.97227285 0.98913043
|
|
0.98377159 0.96927883 0.98314241 0.97722567]
|
|
|
|
mean value: 0.9781801428651968
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.7826087 0.57894737 0.73913043 0.72727273 0.72727273
|
|
0.5 0.68181818 0.6 0.54166667]
|
|
|
|
mean value: 0.6628716801886139
|
|
|
|
key: train_jcc
|
|
value: [0.94252874 0.95953757 0.97076023 0.97093023 0.95375723 0.98235294
|
|
0.9704142 0.94797688 0.97058824 0.95906433]
|
|
|
|
mean value: 0.962791058354843
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06479907 0.05747223 0.05795527 0.07633901 0.05164313 0.05256438
|
|
0.04232883 0.05421734 0.06383824 0.07929087]
|
|
|
|
mean value: 0.06004483699798584
|
|
|
|
key: score_time
|
|
value: [0.01918554 0.02542424 0.02301121 0.01879787 0.02289248 0.01855135
|
|
0.0164628 0.02427959 0.01775217 0.03099537]
|
|
|
|
mean value: 0.021735262870788575
|
|
|
|
key: test_mcc
|
|
value: [0.77083333 0.52822141 0.58925565 0.39794149 0.52280702 0.64210526
|
|
0.29617444 0.65158377 0.29617444 0.38888889]
|
|
|
|
mean value: 0.5083985699651632
|
|
|
|
key: train_mcc
|
|
value: [0.98685259 0.94746683 0.95429519 0.92851788 0.98018231 0.9735312
|
|
0.9348084 0.9735312 0.98018231 0.96083904]
|
|
|
|
mean value: 0.962020697376727
|
|
|
|
key: test_accuracy
|
|
value: [0.88235294 0.76470588 0.79411765 0.70588235 0.76470588 0.82352941
|
|
0.64705882 0.82352941 0.64705882 0.6969697 ]
|
|
|
|
mean value: 0.7549910873440285
|
|
|
|
key: train_accuracy
|
|
value: [0.99344262 0.97377049 0.97704918 0.96393443 0.99016393 0.98688525
|
|
0.96721311 0.98688525 0.99016393 0.98039216]
|
|
|
|
mean value: 0.9809900353584057
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.78947368 0.8 0.75 0.78947368 0.84210526
|
|
0.66666667 0.83333333 0.66666667 0.72222222]
|
|
|
|
mean value: 0.7742294461644307
|
|
|
|
key: train_fscore
|
|
value: [0.99401198 0.97590361 0.97885196 0.96636086 0.99099099 0.98802395
|
|
0.9695122 0.98802395 0.99099099 0.98192771]
|
|
|
|
mean value: 0.9824598202659995
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.75 0.82352941 0.71428571 0.78947368 0.84210526
|
|
0.70588235 0.88235294 0.70588235 0.72222222]
|
|
|
|
mean value: 0.7873233942699887
|
|
|
|
key: train_precision
|
|
value: [1. 0.98780488 0.99386503 0.9875 0.9939759 0.98802395
|
|
0.98757764 0.98802395 0.9939759 0.99390244]
|
|
|
|
mean value: 0.9914649698920103
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.83333333 0.77777778 0.78947368 0.78947368 0.84210526
|
|
0.63157895 0.78947368 0.63157895 0.72222222]
|
|
|
|
mean value: 0.7640350877192983
|
|
|
|
key: train_recall
|
|
value: [0.98809524 0.96428571 0.96428571 0.94610778 0.98802395 0.98802395
|
|
0.95209581 0.98802395 0.98802395 0.9702381 ]
|
|
|
|
mean value: 0.9737204163102366
|
|
|
|
key: test_roc_auc
|
|
value: [0.88541667 0.76041667 0.79513889 0.69473684 0.76140351 0.82105263
|
|
0.64912281 0.82807018 0.64912281 0.69444444]
|
|
|
|
mean value: 0.7538925438596492
|
|
|
|
key: train_roc_auc
|
|
value: [0.99404762 0.97484359 0.97849322 0.96580752 0.99038879 0.9867656
|
|
0.96880153 0.9867656 0.99038879 0.98149586]
|
|
|
|
mean value: 0.9817798103977927
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.65217391 0.66666667 0.6 0.65217391 0.72727273
|
|
0.5 0.71428571 0.5 0.56521739]
|
|
|
|
mean value: 0.6367264009826938
|
|
|
|
key: train_jcc
|
|
value: [0.98809524 0.95294118 0.95857988 0.93491124 0.98214286 0.97633136
|
|
0.9408284 0.97633136 0.98214286 0.96449704]
|
|
|
|
mean value: 0.9656801418792369
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04686761 0.04404998 0.09660649 0.07896829 0.0805831 0.10501623
|
|
0.10462236 0.0427084 0.06894422 0.06551003]
|
|
|
|
mean value: 0.07338767051696778
|
|
|
|
key: score_time
|
|
value: [0.01350307 0.01835465 0.02120543 0.02103281 0.02126193 0.02357173
|
|
0.01335812 0.01332927 0.02102995 0.01328373]
|
|
|
|
mean value: 0.017993068695068358
|
|
|
|
key: test_mcc
|
|
value: [0.22968635 0.22968635 0.35355339 0.15415752 0.2760495 0.20536002
|
|
0.09911893 0.39794149 0.33983632 0.25819889]
|
|
|
|
mean value: 0.2543588757876937
|
|
|
|
key: train_mcc
|
|
value: [0.98681736 0.98013067 0.98013067 0.98015739 0.98015739 0.9867656
|
|
0.99340041 0.98015739 0.98030477 0.98680124]
|
|
|
|
mean value: 0.9834822887732735
|
|
|
|
key: test_accuracy
|
|
value: [0.61764706 0.61764706 0.67647059 0.58823529 0.64705882 0.61764706
|
|
0.55882353 0.70588235 0.67647059 0.63636364]
|
|
|
|
mean value: 0.6342245989304813
|
|
|
|
key: train_accuracy
|
|
value: [0.99344262 0.99016393 0.99016393 0.99016393 0.99016393 0.99344262
|
|
0.99672131 0.99016393 0.99016393 0.99346405]
|
|
|
|
mean value: 0.9918054216222008
|
|
|
|
key: test_fscore
|
|
value: [0.68292683 0.68292683 0.68571429 0.65 0.7 0.71111111
|
|
0.61538462 0.75 0.71794872 0.7 ]
|
|
|
|
mean value: 0.6896012388695316
|
|
|
|
key: train_fscore
|
|
value: [0.99408284 0.99109792 0.99109792 0.99104478 0.99104478 0.99401198
|
|
0.99701493 0.99104478 0.99109792 0.99404762]
|
|
|
|
mean value: 0.9925585457609547
|
|
|
|
key: test_precision
|
|
value: [0.60869565 0.60869565 0.70588235 0.61904762 0.66666667 0.61538462
|
|
0.6 0.71428571 0.7 0.63636364]
|
|
|
|
mean value: 0.6475021909037254
|
|
|
|
key: train_precision
|
|
value: [0.98823529 0.98816568 0.98816568 0.98809524 0.98809524 0.99401198
|
|
0.99404762 0.98809524 0.98235294 0.99404762]
|
|
|
|
mean value: 0.989331252466972
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.77777778 0.66666667 0.68421053 0.73684211 0.84210526
|
|
0.63157895 0.78947368 0.73684211 0.77777778]
|
|
|
|
mean value: 0.7421052631578947
|
|
|
|
key: train_recall
|
|
value: [1. 0.99404762 0.99404762 0.99401198 0.99401198 0.99401198
|
|
1. 0.99401198 1. 0.99404762]
|
|
|
|
mean value: 0.9958190761334473
|
|
|
|
key: test_roc_auc
|
|
value: [0.60763889 0.60763889 0.67708333 0.5754386 0.63508772 0.5877193
|
|
0.54912281 0.69473684 0.66842105 0.62222222]
|
|
|
|
mean value: 0.6225109649122806
|
|
|
|
key: train_roc_auc
|
|
value: [0.99270073 0.98972454 0.98972454 0.98975961 0.98975961 0.9933828
|
|
0.99637681 0.98975961 0.98913043 0.99340062]
|
|
|
|
mean value: 0.9913719309578694
|
|
|
|
key: test_jcc
|
|
value: [0.51851852 0.51851852 0.52173913 0.48148148 0.53846154 0.55172414
|
|
0.44444444 0.6 0.56 0.53846154]
|
|
|
|
mean value: 0.5273349308251857
|
|
|
|
key: train_jcc
|
|
value: [0.98823529 0.98235294 0.98235294 0.98224852 0.98224852 0.98809524
|
|
0.99404762 0.98224852 0.98235294 0.98816568]
|
|
|
|
mean value: 0.9852348217393466
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.52135944 0.50764012 0.53866386 0.51690722 0.51282549 0.506984
|
|
0.53687263 0.51025414 0.50481176 0.50744247]
|
|
|
|
mean value: 0.5163761138916015
|
|
|
|
key: score_time
|
|
value: [0.00953412 0.00952363 0.00984812 0.01052475 0.00969791 0.00929475
|
|
0.00978613 0.00974083 0.0093677 0.0093832 ]
|
|
|
|
mean value: 0.009670114517211914
|
|
|
|
key: test_mcc
|
|
value: [0.70710678 0.78334945 0.54935027 0.45935257 0.77005354 0.7009124
|
|
0.40350877 0.7009124 0.40350877 0.50952467]
|
|
|
|
mean value: 0.598757962439654
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85294118 0.88235294 0.76470588 0.73529412 0.88235294 0.85294118
|
|
0.70588235 0.85294118 0.70588235 0.75757576]
|
|
|
|
mean value: 0.7992869875222817
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.9 0.75 0.7804878 0.88888889 0.87179487
|
|
0.73684211 0.87179487 0.73684211 0.78947368]
|
|
|
|
mean value: 0.8183267189236381
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.81818182 0.85714286 0.72727273 0.94117647 0.85
|
|
0.73684211 0.85 0.73684211 0.75 ]
|
|
|
|
mean value: 0.8149811024888424
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 1. 0.66666667 0.84210526 0.84210526 0.89473684
|
|
0.73684211 0.89473684 0.73684211 0.83333333]
|
|
|
|
mean value: 0.8280701754385965
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.85416667 0.875 0.77083333 0.72105263 0.8877193 0.84736842
|
|
0.70175439 0.84736842 0.70175439 0.75 ]
|
|
|
|
mean value: 0.795701754385965
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.81818182 0.6 0.64 0.8 0.77272727
|
|
0.58333333 0.77272727 0.58333333 0.65217391]
|
|
|
|
mean value: 0.6972476943346508
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03366113 0.02517581 0.0260148 0.0263443 0.02619052 0.02811694
|
|
0.02624726 0.02621651 0.02565145 0.05557752]
|
|
|
|
mean value: 0.02991962432861328
|
|
|
|
key: score_time
|
|
value: [0.0129447 0.01290584 0.01897812 0.01278806 0.01357579 0.0130229
|
|
0.01327062 0.01564002 0.01299119 0.02132869]
|
|
|
|
mean value: 0.01474459171295166
|
|
|
|
key: test_mcc
|
|
value: [ 0.29668305 0.03268602 0.48160041 0.46647866 0.20536002 -0.15942205
|
|
0.00394899 0.27248048 -0.01292611 0.06900656]
|
|
|
|
mean value: 0.16558960364641
|
|
|
|
key: train_mcc
|
|
value: [0.49620666 0.51940449 0.50784598 0.53306996 0.52169062 0.51024495
|
|
0.49872381 0.51024495 0.52738804 0.48177757]
|
|
|
|
mean value: 0.5106597032816975
|
|
|
|
key: test_accuracy
|
|
value: [0.64705882 0.52941176 0.73529412 0.73529412 0.61764706 0.47058824
|
|
0.52941176 0.64705882 0.52941176 0.54545455]
|
|
|
|
mean value: 0.598663101604278
|
|
|
|
key: train_accuracy
|
|
value: [0.71803279 0.73114754 0.72459016 0.73770492 0.73114754 0.72459016
|
|
0.71803279 0.72459016 0.73442623 0.70915033]
|
|
|
|
mean value: 0.7253412621879353
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.63636364 0.7804878 0.79069767 0.71111111 0.60869565
|
|
0.63636364 0.72727273 0.65217391 0.61538462]
|
|
|
|
mean value: 0.6872836485295486
|
|
|
|
key: train_fscore
|
|
value: [0.79620853 0.80382775 0.8 0.80676329 0.80288462 0.79904306
|
|
0.7952381 0.79904306 0.80481928 0.79058824]
|
|
|
|
mean value: 0.799841591445319
|
|
|
|
key: test_precision
|
|
value: [0.625 0.53846154 0.69565217 0.70833333 0.61538462 0.51851852
|
|
0.56 0.64 0.55555556 0.57142857]
|
|
|
|
mean value: 0.6028334306595177
|
|
|
|
key: train_precision
|
|
value: [0.66141732 0.672 0.66666667 0.67611336 0.67068273 0.66533865
|
|
0.66007905 0.66533865 0.6733871 0.6536965 ]
|
|
|
|
mean value: 0.6664720017797614
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.77777778 0.88888889 0.89473684 0.84210526 0.73684211
|
|
0.73684211 0.84210526 0.78947368 0.66666667]
|
|
|
|
mean value: 0.8008771929824561
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.63541667 0.51388889 0.72569444 0.71403509 0.5877193 0.43508772
|
|
0.50175439 0.62105263 0.49473684 0.53333333]
|
|
|
|
mean value: 0.5762719298245614
|
|
|
|
key: train_roc_auc
|
|
value: [0.68613139 0.70072993 0.69343066 0.71014493 0.70289855 0.69565217
|
|
0.6884058 0.69565217 0.70652174 0.67753623]
|
|
|
|
mean value: 0.6957103565005818
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.46666667 0.64 0.65384615 0.55172414 0.4375
|
|
0.46666667 0.57142857 0.48387097 0.44444444]
|
|
|
|
mean value: 0.5271703164281029
|
|
|
|
key: train_jcc
|
|
value: [0.66141732 0.672 0.66666667 0.67611336 0.67068273 0.66533865
|
|
0.66007905 0.66533865 0.6733871 0.6536965 ]
|
|
|
|
mean value: 0.6664720017797614
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03426719 0.04003882 0.03874254 0.03056002 0.03333044 0.03114414
|
|
0.0315578 0.03347445 0.03128171 0.03115177]
|
|
|
|
mean value: 0.033554887771606444
|
|
|
|
key: score_time
|
|
value: [0.02486038 0.02450061 0.02502465 0.02458668 0.02432179 0.0207696
|
|
0.02489066 0.02314758 0.02173805 0.02499294]
|
|
|
|
mean value: 0.023883295059204102
|
|
|
|
key: test_mcc
|
|
value: [0.52777778 0.6846532 0.54935027 0.51983348 0.58055371 0.45935257
|
|
0.29617444 0.48168199 0.13356983 0.52029875]
|
|
|
|
mean value: 0.47532460055847936
|
|
|
|
key: train_mcc
|
|
value: [0.75505307 0.74780266 0.74817804 0.76845863 0.71495634 0.75489797
|
|
0.81466352 0.77531159 0.77477818 0.76226043]
|
|
|
|
mean value: 0.7616360420644603
|
|
|
|
key: test_accuracy
|
|
value: [0.76470588 0.82352941 0.76470588 0.76470588 0.79411765 0.73529412
|
|
0.64705882 0.73529412 0.58823529 0.75757576]
|
|
|
|
mean value: 0.7375222816399287
|
|
|
|
key: train_accuracy
|
|
value: [0.87868852 0.87540984 0.87540984 0.8852459 0.85901639 0.87868852
|
|
0.90819672 0.88852459 0.88852459 0.88235294]
|
|
|
|
mean value: 0.8820057859209257
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.85714286 0.75 0.8 0.82051282 0.7804878
|
|
0.66666667 0.74285714 0.69565217 0.76470588]
|
|
|
|
mean value: 0.7655803126101298
|
|
|
|
key: train_fscore
|
|
value: [0.89337176 0.88823529 0.89017341 0.89795918 0.87315634 0.8914956
|
|
0.91764706 0.90116279 0.9 0.89534884]
|
|
|
|
mean value: 0.894855027620723
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.75 0.85714286 0.76190476 0.8 0.72727273
|
|
0.70588235 0.8125 0.59259259 0.8125 ]
|
|
|
|
mean value: 0.7597573069631893
|
|
|
|
key: train_precision
|
|
value: [0.86592179 0.87790698 0.86516854 0.875 0.86046512 0.87356322
|
|
0.9017341 0.87570621 0.88439306 0.875 ]
|
|
|
|
mean value: 0.8754859020768724
|
|
|
|
key: test_recall
|
|
value: [0.77777778 1. 0.66666667 0.84210526 0.84210526 0.84210526
|
|
0.63157895 0.68421053 0.84210526 0.72222222]
|
|
|
|
mean value: 0.7850877192982456
|
|
|
|
key: train_recall
|
|
value: [0.92261905 0.89880952 0.91666667 0.92215569 0.88622754 0.91017964
|
|
0.93413174 0.92814371 0.91616766 0.91666667]
|
|
|
|
mean value: 0.9151767892785857
|
|
|
|
key: test_roc_auc
|
|
value: [0.76388889 0.8125 0.77083333 0.75438596 0.7877193 0.72105263
|
|
0.64912281 0.74210526 0.55438596 0.76111111]
|
|
|
|
mean value: 0.7317105263157895
|
|
|
|
key: train_roc_auc
|
|
value: [0.87371828 0.87276243 0.87074209 0.8813677 0.85615725 0.87537968
|
|
0.90547167 0.88436171 0.88562006 0.87862319]
|
|
|
|
mean value: 0.8784204056410911
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.75 0.6 0.66666667 0.69565217 0.64
|
|
0.5 0.59090909 0.53333333 0.61904762]
|
|
|
|
mean value: 0.623197252023339
|
|
|
|
key: train_jcc
|
|
value: [0.80729167 0.7989418 0.80208333 0.81481481 0.77486911 0.8042328
|
|
0.84782609 0.82010582 0.81818182 0.81052632]
|
|
|
|
mean value: 0.8098873568970696
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25761247 0.2633729 0.2546823 0.28250599 0.3497107 0.19975376
|
|
0.16722083 0.16166186 0.19858789 0.29385448]
|
|
|
|
mean value: 0.24289631843566895
|
|
|
|
key: score_time
|
|
value: [0.02395988 0.02154922 0.02496672 0.02411437 0.0234642 0.01256752
|
|
0.02424121 0.01267743 0.01260996 0.0249579 ]
|
|
|
|
mean value: 0.02051084041595459
|
|
|
|
key: test_mcc
|
|
value: [0.52777778 0.60755744 0.53472222 0.58330485 0.4677202 0.45935257
|
|
0.17770466 0.39794149 0.13356983 0.57091142]
|
|
|
|
mean value: 0.44605624644321756
|
|
|
|
key: train_mcc
|
|
value: [0.75505307 0.66121339 0.64120778 0.65518118 0.66174647 0.66264256
|
|
0.67562928 0.68187683 0.77477818 0.6565554 ]
|
|
|
|
mean value: 0.6825884144508163
|
|
|
|
key: test_accuracy
|
|
value: [0.76470588 0.79411765 0.76470588 0.79411765 0.73529412 0.73529412
|
|
0.58823529 0.70588235 0.58823529 0.78787879]
|
|
|
|
mean value: 0.7258467023172905
|
|
|
|
key: train_accuracy
|
|
value: [0.87868852 0.83278689 0.82295082 0.8295082 0.83278689 0.83278689
|
|
0.83934426 0.84262295 0.88852459 0.83006536]
|
|
|
|
mean value: 0.8430065359477124
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.82926829 0.76470588 0.82926829 0.75675676 0.7804878
|
|
0.61111111 0.75 0.69565217 0.81081081]
|
|
|
|
mean value: 0.7605838902966344
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.89337176 0.85217391 0.84393064 0.84883721 0.85043988 0.85386819
|
|
0.85878963 0.86046512 0.9 0.85142857]
|
|
|
|
mean value: 0.8613304906717252
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.73913043 0.8125 0.77272727 0.77777778 0.72727273
|
|
0.64705882 0.71428571 0.59259259 0.78947368]
|
|
|
|
mean value: 0.735059680495641
|
|
|
|
key: train_precision
|
|
value: [0.86592179 0.83050847 0.82022472 0.82485876 0.83333333 0.81868132
|
|
0.82777778 0.83615819 0.88439306 0.81868132]
|
|
|
|
mean value: 0.8360538742596998
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.94444444 0.72222222 0.89473684 0.73684211 0.84210526
|
|
0.57894737 0.78947368 0.84210526 0.83333333]
|
|
|
|
mean value: 0.7961988304093567
|
|
|
|
key: train_recall
|
|
value: [0.92261905 0.875 0.86904762 0.8742515 0.86826347 0.89221557
|
|
0.89221557 0.88622754 0.91616766 0.88690476]
|
|
|
|
mean value: 0.8882912745936699
|
|
|
|
key: test_roc_auc
|
|
value: [0.76388889 0.78472222 0.76736111 0.78070175 0.73508772 0.72105263
|
|
0.58947368 0.69473684 0.55438596 0.78333333]
|
|
|
|
mean value: 0.7174744152046784
|
|
|
|
key: train_roc_auc
|
|
value: [0.87371828 0.82801095 0.81773549 0.82480691 0.82905927 0.82654257
|
|
0.83378894 0.83804131 0.88562006 0.82388716]
|
|
|
|
mean value: 0.838121094823546
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.70833333 0.61904762 0.70833333 0.60869565 0.64
|
|
0.44 0.6 0.53333333 0.68181818]
|
|
|
|
mean value: 0.617592508940335
|
|
|
|
key: train_jcc
|
|
value: [0.80729167 0.74242424 0.73 0.73737374 0.73979592 0.745
|
|
0.75252525 0.75510204 0.81818182 0.74129353]
|
|
|
|
mean value: 0.7568988208693699
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04396796 0.03654957 0.06763983 0.06130362 0.08093858 0.14613819
|
|
0.10202789 0.06165719 0.03520894 0.03634906]
|
|
|
|
mean value: 0.06717808246612549
|
|
|
|
key: score_time
|
|
value: [0.01707697 0.02061653 0.02115917 0.01255584 0.01231718 0.02237773
|
|
0.01698637 0.0154736 0.01535988 0.01562119]
|
|
|
|
mean value: 0.016954445838928224
|
|
|
|
key: test_mcc
|
|
value: [0.68803296 0.47633051 0.51319869 0.4633451 0.31339521 0.56934383
|
|
0.35104619 0.7888597 0.41299552 0.73099415]
|
|
|
|
mean value: 0.5307541863812678
|
|
|
|
key: train_mcc
|
|
value: [0.68862275 0.70079986 0.7074779 0.74329912 0.7611919 0.71347093
|
|
0.71974618 0.68954947 0.70853021 0.67766887]
|
|
|
|
mean value: 0.7110357185825491
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.73684211 0.75675676 0.72972973 0.64864865 0.78378378
|
|
0.67567568 0.89189189 0.7027027 0.86486486]
|
|
|
|
mean value: 0.7633001422475106
|
|
|
|
key: train_accuracy
|
|
value: [0.84431138 0.8502994 0.85373134 0.87164179 0.88059701 0.85671642
|
|
0.85970149 0.84477612 0.85373134 0.83880597]
|
|
|
|
mean value: 0.8554312270980428
|
|
|
|
key: test_fscore
|
|
value: [0.85 0.72222222 0.74285714 0.73684211 0.68292683 0.76470588
|
|
0.7 0.88888889 0.68571429 0.86486486]
|
|
|
|
mean value: 0.7639022221431796
|
|
|
|
key: train_fscore
|
|
value: [0.84431138 0.85207101 0.85373134 0.87164179 0.88095238 0.85798817
|
|
0.86135693 0.84431138 0.85714286 0.83928571]
|
|
|
|
mean value: 0.8562792944951354
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.76470588 0.76470588 0.7 0.60869565 0.8125
|
|
0.66666667 0.94117647 0.75 0.88888889]
|
|
|
|
mean value: 0.7706863252547396
|
|
|
|
key: train_precision
|
|
value: [0.84431138 0.84210526 0.85628743 0.8742515 0.88095238 0.85294118
|
|
0.84883721 0.84431138 0.83522727 0.83431953]
|
|
|
|
mean value: 0.8513544505884387
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.68421053 0.72222222 0.77777778 0.77777778 0.72222222
|
|
0.73684211 0.84210526 0.63157895 0.84210526]
|
|
|
|
mean value: 0.763157894736842
|
|
|
|
key: train_recall
|
|
value: [0.84431138 0.86227545 0.85119048 0.86904762 0.88095238 0.86309524
|
|
0.8742515 0.84431138 0.88023952 0.84431138]
|
|
|
|
mean value: 0.861398631308811
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.73684211 0.75584795 0.73099415 0.65204678 0.78216374
|
|
0.67397661 0.89327485 0.70467836 0.86549708]
|
|
|
|
mean value: 0.7637426900584795
|
|
|
|
key: train_roc_auc
|
|
value: [0.84431138 0.8502994 0.85373895 0.87164956 0.88059595 0.85669732
|
|
0.8597448 0.84477474 0.85381024 0.83882236]
|
|
|
|
mean value: 0.8554444682064443
|
|
|
|
key: test_jcc
|
|
value: [0.73913043 0.56521739 0.59090909 0.58333333 0.51851852 0.61904762
|
|
0.53846154 0.8 0.52173913 0.76190476]
|
|
|
|
mean value: 0.6238261818696601
|
|
|
|
key: train_jcc
|
|
value: [0.73056995 0.74226804 0.74479167 0.77248677 0.78723404 0.75129534
|
|
0.75647668 0.73056995 0.75 0.72307692]
|
|
|
|
mean value: 0.7488769363119113
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.88585067 0.84192276 1.05506897 0.93717885 1.3041873 1.291399
|
|
0.83234978 0.92508912 1.09525394 0.87185836]
|
|
|
|
mean value: 1.0040158748626709
|
|
|
|
key: score_time
|
|
value: [0.01258469 0.01221967 0.0127883 0.01269531 0.01253319 0.01251841
|
|
0.01307392 0.01263189 0.01253581 0.01256847]
|
|
|
|
mean value: 0.012614965438842773
|
|
|
|
key: test_mcc
|
|
value: [0.80757285 0.42163702 0.51793973 0.53638795 0.53638795 0.56725146
|
|
0.45906433 0.7888597 0.4633451 0.56934383]
|
|
|
|
mean value: 0.5667789920768883
|
|
|
|
key: train_mcc
|
|
value: [0.57486061 0.69492232 0.64216176 0.60007131 0.64182084 0.6360687
|
|
0.62389507 0.58228165 0.66613183 0.60602388]
|
|
|
|
mean value: 0.6268237981688066
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.71052632 0.75675676 0.75675676 0.75675676 0.78378378
|
|
0.72972973 0.89189189 0.72972973 0.78378378]
|
|
|
|
mean value: 0.7794452347083927
|
|
|
|
key: train_accuracy
|
|
value: [0.78742515 0.84730539 0.82089552 0.8 0.82089552 0.81791045
|
|
0.8119403 0.79104478 0.83283582 0.80298507]
|
|
|
|
mean value: 0.8133238001608722
|
|
|
|
key: test_fscore
|
|
value: [0.9047619 0.7027027 0.72727273 0.7804878 0.7804878 0.77777778
|
|
0.73684211 0.88888889 0.72222222 0.8 ]
|
|
|
|
mean value: 0.7821443938645479
|
|
|
|
key: train_fscore
|
|
value: [0.7880597 0.84955752 0.8245614 0.80235988 0.82248521 0.82111437
|
|
0.8119403 0.79289941 0.83529412 0.80357143]
|
|
|
|
mean value: 0.8151843338743134
|
|
|
|
key: test_precision
|
|
value: [0.82608696 0.72222222 0.8 0.69565217 0.69565217 0.77777778
|
|
0.73684211 0.94117647 0.76470588 0.76190476]
|
|
|
|
mean value: 0.7722020524456922
|
|
|
|
key: train_precision
|
|
value: [0.78571429 0.8372093 0.81034483 0.79532164 0.81764706 0.80924855
|
|
0.80952381 0.78362573 0.82080925 0.79881657]
|
|
|
|
mean value: 0.806826102391001
|
|
|
|
key: test_recall
|
|
value: [1. 0.68421053 0.66666667 0.88888889 0.88888889 0.77777778
|
|
0.73684211 0.84210526 0.68421053 0.84210526]
|
|
|
|
mean value: 0.8011695906432749
|
|
|
|
key: train_recall
|
|
value: [0.79041916 0.86227545 0.83928571 0.80952381 0.82738095 0.83333333
|
|
0.81437126 0.80239521 0.8502994 0.80838323]
|
|
|
|
mean value: 0.823766752209866
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.71052632 0.75438596 0.76023392 0.76023392 0.78362573
|
|
0.72953216 0.89327485 0.73099415 0.78216374]
|
|
|
|
mean value: 0.7799707602339181
|
|
|
|
key: train_roc_auc
|
|
value: [0.78742515 0.84730539 0.82084046 0.79997149 0.8208761 0.81786427
|
|
0.81194753 0.79107856 0.8328878 0.80300114]
|
|
|
|
mean value: 0.8133197889934417
|
|
|
|
key: test_jcc
|
|
value: [0.82608696 0.54166667 0.57142857 0.64 0.64 0.63636364
|
|
0.58333333 0.8 0.56521739 0.66666667]
|
|
|
|
mean value: 0.6470763222284961
|
|
|
|
key: train_jcc
|
|
value: [0.65024631 0.73846154 0.70149254 0.66995074 0.69849246 0.69651741
|
|
0.68341709 0.65686275 0.71717172 0.67164179]
|
|
|
|
mean value: 0.6884254334098496
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01522541 0.01153207 0.01446152 0.01279211 0.01003456 0.00969553
|
|
0.00969219 0.01678538 0.00997353 0.0098846 ]
|
|
|
|
mean value: 0.012007689476013184
|
|
|
|
key: score_time
|
|
value: [0.01304626 0.0097332 0.01486111 0.01349115 0.00920463 0.00899363
|
|
0.01378655 0.01271343 0.00948763 0.00904417]
|
|
|
|
mean value: 0.011436176300048829
|
|
|
|
key: test_mcc
|
|
value: [0.43643578 0.1132277 0.36315314 0.39648395 0.27817826 0.26327408
|
|
0.35558302 0.56934383 0.18980224 0.56725146]
|
|
|
|
mean value: 0.3532733473857496
|
|
|
|
key: train_mcc
|
|
value: [0.3843783 0.42048645 0.40067849 0.41081153 0.40918659 0.40587205
|
|
0.46455426 0.38525508 0.43524944 0.4649599 ]
|
|
|
|
mean value: 0.4181432104119126
|
|
|
|
key: test_accuracy
|
|
value: [0.71052632 0.55263158 0.67567568 0.67567568 0.62162162 0.62162162
|
|
0.67567568 0.78378378 0.59459459 0.78378378]
|
|
|
|
mean value: 0.6695590327169274
|
|
|
|
key: train_accuracy
|
|
value: [0.68263473 0.7005988 0.69253731 0.69552239 0.69552239 0.68059701
|
|
0.72537313 0.68358209 0.70746269 0.73134328]
|
|
|
|
mean value: 0.6995173831441595
|
|
|
|
key: test_fscore
|
|
value: [0.74418605 0.62222222 0.7 0.72727273 0.68181818 0.66666667
|
|
0.71428571 0.8 0.65116279 0.78947368]
|
|
|
|
mean value: 0.7097088033685341
|
|
|
|
key: train_fscore
|
|
value: [0.7253886 0.73958333 0.7310705 0.7371134 0.7357513 0.7409201
|
|
0.7540107 0.72395833 0.74479167 0.74285714]
|
|
|
|
mean value: 0.7375445062748406
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.53846154 0.63636364 0.61538462 0.57692308 0.58333333
|
|
0.65217391 0.76190476 0.58333333 0.78947368]
|
|
|
|
mean value: 0.6404018559624967
|
|
|
|
key: train_precision
|
|
value: [0.63926941 0.65437788 0.65116279 0.65 0.65137615 0.6244898
|
|
0.68115942 0.640553 0.65898618 0.71038251]
|
|
|
|
mean value: 0.6561757124440029
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.73684211 0.77777778 0.88888889 0.83333333 0.77777778
|
|
0.78947368 0.84210526 0.73684211 0.78947368]
|
|
|
|
mean value: 0.8014619883040935
|
|
|
|
key: train_recall
|
|
value: [0.83832335 0.8502994 0.83333333 0.85119048 0.8452381 0.91071429
|
|
0.84431138 0.83233533 0.85628743 0.77844311]
|
|
|
|
mean value: 0.844047619047619
|
|
|
|
key: test_roc_auc
|
|
value: [0.71052632 0.55263158 0.67836257 0.68128655 0.62719298 0.62573099
|
|
0.67251462 0.78216374 0.59064327 0.78362573]
|
|
|
|
mean value: 0.67046783625731
|
|
|
|
key: train_roc_auc
|
|
value: [0.68263473 0.7005988 0.69211577 0.69505632 0.69507414 0.67990804
|
|
0.72572712 0.68402481 0.70790562 0.73148346]
|
|
|
|
mean value: 0.699452879954377
|
|
|
|
key: test_jcc
|
|
value: [0.59259259 0.4516129 0.53846154 0.57142857 0.51724138 0.5
|
|
0.55555556 0.66666667 0.48275862 0.65217391]
|
|
|
|
mean value: 0.552849174097421
|
|
|
|
key: train_jcc
|
|
value: [0.56910569 0.58677686 0.57613169 0.58367347 0.58196721 0.58846154
|
|
0.60515021 0.56734694 0.593361 0.59090909]
|
|
|
|
mean value: 0.5842883698895387
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01714635 0.01055765 0.01006603 0.01036644 0.01142645 0.0168221
|
|
0.01001072 0.01058173 0.00991893 0.01728797]
|
|
|
|
mean value: 0.012418437004089355
|
|
|
|
key: score_time
|
|
value: [0.01294327 0.00889134 0.00889206 0.00903654 0.01446939 0.0122788
|
|
0.00896454 0.00921202 0.01486659 0.01152539]
|
|
|
|
mean value: 0.011107993125915528
|
|
|
|
key: test_mcc
|
|
value: [0.63960215 0.37686733 0.30307132 0.24633537 0.09678053 0.29824561
|
|
0.35484024 0.73099415 0.40469382 0.29766651]
|
|
|
|
mean value: 0.37490970420832104
|
|
|
|
key: train_mcc
|
|
value: [0.49102677 0.53354668 0.53432421 0.51049724 0.5351962 0.5223838
|
|
0.51643142 0.50447336 0.50512364 0.49887672]
|
|
|
|
mean value: 0.5151880034378592
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.68421053 0.64864865 0.62162162 0.54054054 0.64864865
|
|
0.67567568 0.86486486 0.7027027 0.64864865]
|
|
|
|
mean value: 0.6851351351351351
|
|
|
|
key: train_accuracy
|
|
value: [0.74550898 0.76646707 0.76716418 0.75522388 0.76716418 0.76119403
|
|
0.75820896 0.75223881 0.75223881 0.74925373]
|
|
|
|
mean value: 0.757466261506837
|
|
|
|
key: test_fscore
|
|
value: [0.82926829 0.64705882 0.58064516 0.63157895 0.60465116 0.64864865
|
|
0.66666667 0.86486486 0.71794872 0.68292683]
|
|
|
|
mean value: 0.6874258115058971
|
|
|
|
key: train_fscore
|
|
value: [0.74626866 0.77192982 0.76785714 0.75449102 0.77456647 0.76190476
|
|
0.75820896 0.75075075 0.74461538 0.75294118]
|
|
|
|
mean value: 0.7583534145052842
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.73333333 0.69230769 0.6 0.52 0.63157895
|
|
0.70588235 0.88888889 0.7 0.63636364]
|
|
|
|
mean value: 0.6881082123930421
|
|
|
|
key: train_precision
|
|
value: [0.74404762 0.75428571 0.76785714 0.75903614 0.75280899 0.76190476
|
|
0.75595238 0.75301205 0.76582278 0.73988439]
|
|
|
|
mean value: 0.7554611978456459
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.57894737 0.5 0.66666667 0.72222222 0.66666667
|
|
0.63157895 0.84210526 0.73684211 0.73684211]
|
|
|
|
mean value: 0.6976608187134503
|
|
|
|
key: train_recall
|
|
value: [0.74850299 0.79041916 0.76785714 0.75 0.79761905 0.76190476
|
|
0.76047904 0.74850299 0.7245509 0.76646707]
|
|
|
|
mean value: 0.7616303108069575
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.68421053 0.64473684 0.62280702 0.54532164 0.64912281
|
|
0.67690058 0.86549708 0.70175439 0.64619883]
|
|
|
|
mean value: 0.685233918128655
|
|
|
|
key: train_roc_auc
|
|
value: [0.74550898 0.76646707 0.7671621 0.75523952 0.767073 0.7611919
|
|
0.75821571 0.75222769 0.7521564 0.74930496]
|
|
|
|
mean value: 0.7574547333903622
|
|
|
|
key: test_jcc
|
|
value: [0.70833333 0.47826087 0.40909091 0.46153846 0.43333333 0.48
|
|
0.5 0.76190476 0.56 0.51851852]
|
|
|
|
mean value: 0.5310980187284535
|
|
|
|
key: train_jcc
|
|
value: [0.5952381 0.62857143 0.62318841 0.60576923 0.63207547 0.61538462
|
|
0.61057692 0.60096154 0.59313725 0.60377358]
|
|
|
|
mean value: 0.6108676548804667
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00950575 0.01038885 0.01083946 0.01063919 0.01487327 0.01237392
|
|
0.01116276 0.01098061 0.0092721 0.01092935]
|
|
|
|
mean value: 0.011096525192260741
|
|
|
|
key: score_time
|
|
value: [0.01754284 0.01708961 0.01759601 0.01831388 0.04157138 0.0168407
|
|
0.02050781 0.01795244 0.0174098 0.01833153]
|
|
|
|
mean value: 0.02031559944152832
|
|
|
|
key: test_mcc
|
|
value: [0.05547002 0.31980107 0.13274856 0.24633537 0.02359974 0.51793973
|
|
0.14287993 0.29824561 0.35484024 0.35484024]
|
|
|
|
mean value: 0.24467005087899887
|
|
|
|
key: train_mcc
|
|
value: [0.59298449 0.5513828 0.53461759 0.57014542 0.5587799 0.56461424
|
|
0.57611834 0.54035652 0.59408305 0.52837183]
|
|
|
|
mean value: 0.5611454187639617
|
|
|
|
key: test_accuracy
|
|
value: [0.52631579 0.65789474 0.56756757 0.62162162 0.51351351 0.75675676
|
|
0.56756757 0.64864865 0.67567568 0.67567568]
|
|
|
|
mean value: 0.6211237553342817
|
|
|
|
key: train_accuracy
|
|
value: [0.79640719 0.7754491 0.76716418 0.78507463 0.77910448 0.78208955
|
|
0.7880597 0.77014925 0.79701493 0.7641791 ]
|
|
|
|
mean value: 0.7804692108320672
|
|
|
|
key: test_fscore
|
|
value: [0.59090909 0.62857143 0.52941176 0.63157895 0.47058824 0.72727273
|
|
0.52941176 0.64864865 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6089725940809532
|
|
|
|
key: train_fscore
|
|
value: [0.79393939 0.78005865 0.77192982 0.78571429 0.78488372 0.78717201
|
|
0.78678679 0.7673716 0.79761905 0.7641791 ]
|
|
|
|
mean value: 0.7819654427925422
|
|
|
|
key: test_precision
|
|
value: [0.52 0.6875 0.5625 0.6 0.5 0.8
|
|
0.6 0.66666667 0.70588235 0.70588235]
|
|
|
|
mean value: 0.6348431372549019
|
|
|
|
key: train_precision
|
|
value: [0.80368098 0.76436782 0.75862069 0.78571429 0.76704545 0.77142857
|
|
0.78915663 0.77439024 0.79289941 0.76190476]
|
|
|
|
mean value: 0.7769208839627779
|
|
|
|
key: test_recall
|
|
value: [0.68421053 0.57894737 0.5 0.66666667 0.44444444 0.66666667
|
|
0.47368421 0.63157895 0.63157895 0.63157895]
|
|
|
|
mean value: 0.5909356725146199
|
|
|
|
key: train_recall
|
|
value: [0.78443114 0.79640719 0.78571429 0.78571429 0.80357143 0.80357143
|
|
0.78443114 0.76047904 0.80239521 0.76646707]
|
|
|
|
mean value: 0.7873182207014542
|
|
|
|
key: test_roc_auc
|
|
value: [0.52631579 0.65789474 0.56578947 0.62280702 0.51169591 0.75438596
|
|
0.57017544 0.64912281 0.67690058 0.67690058]
|
|
|
|
mean value: 0.6211988304093568
|
|
|
|
key: train_roc_auc
|
|
value: [0.79640719 0.7754491 0.76710864 0.78507271 0.77903122 0.78202524
|
|
0.7880489 0.77012047 0.79703094 0.76418591]
|
|
|
|
mean value: 0.7804480325064157
|
|
|
|
key: test_jcc
|
|
value: [0.41935484 0.45833333 0.36 0.46153846 0.30769231 0.57142857
|
|
0.36 0.48 0.5 0.5 ]
|
|
|
|
mean value: 0.44183475127023514
|
|
|
|
key: train_jcc
|
|
value: [0.65829146 0.63942308 0.62857143 0.64705882 0.64593301 0.64903846
|
|
0.64851485 0.62254902 0.66336634 0.61835749]
|
|
|
|
mean value: 0.6421103957852239
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02105331 0.01949525 0.0189383 0.0195446 0.01917481 0.01617718
|
|
0.01981163 0.01758862 0.01932025 0.01966119]
|
|
|
|
mean value: 0.01907651424407959
|
|
|
|
key: score_time
|
|
value: [0.01235318 0.01200557 0.01184678 0.01205277 0.0112555 0.01156902
|
|
0.01212907 0.01227331 0.01167321 0.01169038]
|
|
|
|
mean value: 0.01188488006591797
|
|
|
|
key: test_mcc
|
|
value: [0.54554473 0.21081851 0.46019501 0.57184997 0.47328975 0.56934383
|
|
0.40780312 0.7888597 0.40643275 0.62280702]
|
|
|
|
mean value: 0.505694437801481
|
|
|
|
key: train_mcc
|
|
value: [0.67684089 0.72642872 0.71343028 0.71349136 0.72541262 0.66023295
|
|
0.74396311 0.67172907 0.70171125 0.69564753]
|
|
|
|
mean value: 0.7028887772822439
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.60526316 0.72972973 0.78378378 0.72972973 0.78378378
|
|
0.7027027 0.89189189 0.7027027 0.81081081]
|
|
|
|
mean value: 0.7503556187766715
|
|
|
|
key: train_accuracy
|
|
value: [0.83832335 0.86227545 0.85671642 0.85671642 0.86268657 0.82985075
|
|
0.87164179 0.8358209 0.85074627 0.84776119]
|
|
|
|
mean value: 0.8512539100902672
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.61538462 0.70588235 0.78947368 0.75 0.76470588
|
|
0.73170732 0.88888889 0.7027027 0.81081081]
|
|
|
|
mean value: 0.7550253928783437
|
|
|
|
key: train_fscore
|
|
value: [0.83636364 0.86705202 0.85714286 0.85628743 0.86390533 0.83381924
|
|
0.87390029 0.83383686 0.85207101 0.84866469]
|
|
|
|
mean value: 0.852304335480951
|
|
|
|
key: test_precision
|
|
value: [0.70833333 0.6 0.75 0.75 0.68181818 0.8125
|
|
0.68181818 0.94117647 0.72222222 0.83333333]
|
|
|
|
mean value: 0.7481201723113488
|
|
|
|
key: train_precision
|
|
value: [0.84662577 0.83798883 0.85714286 0.86144578 0.85882353 0.81714286
|
|
0.85632184 0.84146341 0.84210526 0.84117647]
|
|
|
|
mean value: 0.8460236607977554
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.63157895 0.66666667 0.83333333 0.83333333 0.72222222
|
|
0.78947368 0.84210526 0.68421053 0.78947368]
|
|
|
|
mean value: 0.7687134502923977
|
|
|
|
key: train_recall
|
|
value: [0.82634731 0.89820359 0.85714286 0.85119048 0.86904762 0.85119048
|
|
0.89221557 0.82634731 0.86227545 0.85628743]
|
|
|
|
mean value: 0.8590248075278015
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.60526316 0.72807018 0.78508772 0.73245614 0.78216374
|
|
0.7002924 0.89327485 0.70321637 0.81140351]
|
|
|
|
mean value: 0.750438596491228
|
|
|
|
key: train_roc_auc
|
|
value: [0.83832335 0.86227545 0.85671514 0.85673296 0.86266752 0.82978685
|
|
0.87170302 0.8357927 0.85078058 0.84778657]
|
|
|
|
mean value: 0.8512564157399487
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.44444444 0.54545455 0.65217391 0.6 0.61904762
|
|
0.57692308 0.8 0.54166667 0.68181818]
|
|
|
|
mean value: 0.6115374601244167
|
|
|
|
key: train_jcc
|
|
value: [0.71875 0.76530612 0.75 0.7486911 0.76041667 0.715
|
|
0.77604167 0.71502591 0.74226804 0.7371134 ]
|
|
|
|
mean value: 0.7428612905293472
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.29484487 1.35141802 1.39955044 1.25552177 1.45511603 1.26184177
|
|
1.40923262 1.45287323 1.29063821 1.32422447]
|
|
|
|
mean value: 1.3495261430740357
|
|
|
|
key: score_time
|
|
value: [0.01481819 0.01488948 0.01498389 0.01518703 0.01499796 0.01529431
|
|
0.01510549 0.01532483 0.01228356 0.01522923]
|
|
|
|
mean value: 0.014811396598815918
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.26315789 0.57184997 0.24633537 0.31339521 0.51319869
|
|
0.35484024 0.56934383 0.52214434 0.74044197]
|
|
|
|
mean value: 0.47271630353911137
|
|
|
|
key: train_mcc
|
|
value: [0.97021644 0.97021644 0.98224601 0.97639598 0.98812962 0.96424625
|
|
0.98813046 0.97639931 0.97639931 0.97639931]
|
|
|
|
mean value: 0.97687791348353
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.63157895 0.78378378 0.62162162 0.64864865 0.75675676
|
|
0.67567568 0.78378378 0.75675676 0.86486486]
|
|
|
|
mean value: 0.7339260312944523
|
|
|
|
key: train_accuracy
|
|
value: [0.98502994 0.98502994 0.99104478 0.9880597 0.99402985 0.98208955
|
|
0.99402985 0.9880597 0.9880597 0.9880597 ]
|
|
|
|
mean value: 0.9883492716060417
|
|
|
|
key: test_fscore
|
|
value: [0.82051282 0.63157895 0.78947368 0.63157895 0.68292683 0.74285714
|
|
0.66666667 0.8 0.74285714 0.85714286]
|
|
|
|
mean value: 0.7365595038252292
|
|
|
|
key: train_fscore
|
|
value: [0.9851632 0.9851632 0.99115044 0.98823529 0.99408284 0.98224852
|
|
0.99404762 0.98816568 0.98816568 0.98816568]
|
|
|
|
mean value: 0.9884588167505555
|
|
|
|
key: test_precision
|
|
value: [0.8 0.63157895 0.75 0.6 0.60869565 0.76470588
|
|
0.70588235 0.76190476 0.8125 0.9375 ]
|
|
|
|
mean value: 0.7372767596741213
|
|
|
|
key: train_precision
|
|
value: [0.97647059 0.97647059 0.98245614 0.97674419 0.98823529 0.97647059
|
|
0.98816568 0.97660819 0.97660819 0.97660819]
|
|
|
|
mean value: 0.97948376270978
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.63157895 0.83333333 0.66666667 0.77777778 0.72222222
|
|
0.63157895 0.84210526 0.68421053 0.78947368]
|
|
|
|
mean value: 0.7421052631578947
|
|
|
|
key: train_recall
|
|
value: [0.99401198 0.99401198 1. 1. 1. 0.98809524
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9976119190191046
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.63157895 0.78508772 0.62280702 0.65204678 0.75584795
|
|
0.67690058 0.78216374 0.75877193 0.86695906]
|
|
|
|
mean value: 0.7347953216374269
|
|
|
|
key: train_roc_auc
|
|
value: [0.98502994 0.98502994 0.99101796 0.98802395 0.99401198 0.98207157
|
|
0.99404762 0.98809524 0.98809524 0.98809524]
|
|
|
|
mean value: 0.9883518676931851
|
|
|
|
key: test_jcc
|
|
value: [0.69565217 0.46153846 0.65217391 0.46153846 0.51851852 0.59090909
|
|
0.5 0.66666667 0.59090909 0.75 ]
|
|
|
|
mean value: 0.5887906377036812
|
|
|
|
key: train_jcc
|
|
value: [0.97076023 0.97076023 0.98245614 0.97674419 0.98823529 0.96511628
|
|
0.98816568 0.97660819 0.97660819 0.97660819]
|
|
|
|
mean value: 0.9772062609297942
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02758646 0.02779484 0.0250752 0.02647305 0.02887154 0.02721882
|
|
0.0267508 0.02393889 0.02361465 0.02253938]
|
|
|
|
mean value: 0.025986361503601074
|
|
|
|
key: score_time
|
|
value: [0.01208806 0.00973105 0.01033592 0.01000762 0.0095365 0.00903392
|
|
0.00990868 0.00987482 0.00969052 0.00939345]
|
|
|
|
mean value: 0.009960055351257324
|
|
|
|
key: test_mcc
|
|
value: [0.42163702 0.47633051 0.40469382 0.25301653 0.35484024 0.52214434
|
|
0.24189738 0.51319869 0.45906433 0.6754386 ]
|
|
|
|
mean value: 0.43222614545255783
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71052632 0.73684211 0.7027027 0.62162162 0.67567568 0.75675676
|
|
0.62162162 0.75675676 0.72972973 0.83783784]
|
|
|
|
mean value: 0.7150071123755334
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7027027 0.72222222 0.68571429 0.65 0.68421053 0.76923077
|
|
0.65 0.76923077 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7212258643837591
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.72222222 0.76470588 0.70588235 0.59090909 0.65 0.71428571
|
|
0.61904762 0.75 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7096000250179817
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.68421053 0.68421053 0.66666667 0.72222222 0.72222222 0.83333333
|
|
0.68421053 0.78947368 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7365497076023392
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.71052632 0.73684211 0.70175439 0.62426901 0.67690058 0.75877193
|
|
0.61988304 0.75584795 0.72953216 0.8377193 ]
|
|
|
|
mean value: 0.7152046783625732
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54166667 0.56521739 0.52173913 0.48148148 0.52 0.625
|
|
0.48148148 0.625 0.58333333 0.72727273]
|
|
|
|
mean value: 0.567219221197482
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12596893 0.12527752 0.12332964 0.11733294 0.11807919 0.11906362
|
|
0.1212008 0.122576 0.12278318 0.1255846 ]
|
|
|
|
mean value: 0.12211964130401612
|
|
|
|
key: score_time
|
|
value: [0.01933122 0.01941299 0.01862574 0.01779246 0.0193646 0.01775312
|
|
0.01964116 0.01883388 0.01904368 0.01940775]
|
|
|
|
mean value: 0.0189206600189209
|
|
|
|
key: test_mcc
|
|
value: [0.2773501 0.31980107 0.51319869 0.60308132 0.36315314 0.62807634
|
|
0.25301653 0.73020842 0.4633451 0.78362573]
|
|
|
|
mean value: 0.4934856445692557
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.63157895 0.65789474 0.75675676 0.78378378 0.67567568 0.81081081
|
|
0.62162162 0.86486486 0.72972973 0.89189189]
|
|
|
|
mean value: 0.7424608819345662
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.68181818 0.62857143 0.74285714 0.80952381 0.7 0.78787879
|
|
0.58823529 0.87179487 0.72222222 0.89473684]
|
|
|
|
mean value: 0.7427638580889355
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 0.6875 0.76470588 0.70833333 0.63636364 0.86666667
|
|
0.66666667 0.85 0.76470588 0.89473684]
|
|
|
|
mean value: 0.7439678909841448
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.57894737 0.72222222 0.94444444 0.77777778 0.72222222
|
|
0.52631579 0.89473684 0.68421053 0.89473684]
|
|
|
|
mean value: 0.7535087719298246
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.63157895 0.65789474 0.75584795 0.7880117 0.67836257 0.80847953
|
|
0.62426901 0.86403509 0.73099415 0.89181287]
|
|
|
|
mean value: 0.7431286549707602
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.51724138 0.45833333 0.59090909 0.68 0.53846154 0.65
|
|
0.41666667 0.77272727 0.56521739 0.80952381]
|
|
|
|
mean value: 0.5999080482236404
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01099753 0.01092768 0.01097798 0.01108003 0.01101756 0.01090622
|
|
0.00987029 0.01084471 0.01115012 0.01017547]
|
|
|
|
mean value: 0.010794758796691895
|
|
|
|
key: score_time
|
|
value: [0.00972366 0.00961947 0.00971508 0.00961971 0.00963116 0.00879359
|
|
0.00882149 0.00970793 0.00967693 0.00886703]
|
|
|
|
mean value: 0.00941760540008545
|
|
|
|
key: test_mcc
|
|
value: [ 0.05547002 0.26315789 0.40469382 -0.02932564 0.20189884 0.35104619
|
|
0.13274856 0.24633537 0.35087719 0.35104619]
|
|
|
|
mean value: 0.2327948446160859
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.52631579 0.63157895 0.7027027 0.48648649 0.59459459 0.67567568
|
|
0.56756757 0.62162162 0.67567568 0.67567568]
|
|
|
|
mean value: 0.6157894736842106
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.59090909 0.63157895 0.68571429 0.45714286 0.63414634 0.64705882
|
|
0.6 0.61111111 0.68421053 0.7 ]
|
|
|
|
mean value: 0.6241871983554382
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.52 0.63157895 0.70588235 0.47058824 0.56521739 0.6875
|
|
0.57142857 0.64705882 0.68421053 0.66666667]
|
|
|
|
mean value: 0.6150131514848503
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.68421053 0.63157895 0.66666667 0.44444444 0.72222222 0.61111111
|
|
0.63157895 0.57894737 0.68421053 0.73684211]
|
|
|
|
mean value: 0.6391812865497076
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.52631579 0.63157895 0.70175439 0.48538012 0.59795322 0.67397661
|
|
0.56578947 0.62280702 0.6754386 0.67397661]
|
|
|
|
mean value: 0.6154970760233919
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.41935484 0.46153846 0.52173913 0.2962963 0.46428571 0.47826087
|
|
0.42857143 0.44 0.52 0.53846154]
|
|
|
|
mean value: 0.45685082778631164
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.64551973 1.61713815 1.69127131 1.62904048 1.6219089 1.61603379
|
|
1.63544536 1.61761642 1.64602733 1.63242745]
|
|
|
|
mean value: 1.6352428913116455
|
|
|
|
key: score_time
|
|
value: [0.09257317 0.09689522 0.09144974 0.09084296 0.09187818 0.09140754
|
|
0.09154296 0.0911727 0.09119177 0.09239578]
|
|
|
|
mean value: 0.09213500022888184
|
|
|
|
key: test_mcc
|
|
value: [0.43643578 0.42163702 0.62280702 0.40643275 0.24633537 0.62170355
|
|
0.29824561 0.7888597 0.56725146 0.84959079]
|
|
|
|
mean value: 0.5259299054581068
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71052632 0.71052632 0.81081081 0.7027027 0.62162162 0.81081081
|
|
0.64864865 0.89189189 0.78378378 0.91891892]
|
|
|
|
mean value: 0.7610241820768137
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.74418605 0.7027027 0.81081081 0.7027027 0.63157895 0.8
|
|
0.64864865 0.88888889 0.78947368 0.91428571]
|
|
|
|
mean value: 0.7633278146130044
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.72222222 0.78947368 0.68421053 0.6 0.82352941
|
|
0.66666667 0.94117647 0.78947368 1. ]
|
|
|
|
mean value: 0.7683419332645338
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.68421053 0.83333333 0.72222222 0.66666667 0.77777778
|
|
0.63157895 0.84210526 0.78947368 0.84210526]
|
|
|
|
mean value: 0.763157894736842
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.71052632 0.71052632 0.81140351 0.70321637 0.62280702 0.80994152
|
|
0.64912281 0.89327485 0.78362573 0.92105263]
|
|
|
|
mean value: 0.7615497076023392
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.59259259 0.54166667 0.68181818 0.54166667 0.46153846 0.66666667
|
|
0.48 0.8 0.65217391 0.84210526]
|
|
|
|
mean value: 0.6260228412150609
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.90662146 0.95453739 1.03997731 0.93022537 0.9486177 0.96615672
|
|
0.9313314 0.94530773 0.97596669 0.97793722]
|
|
|
|
mean value: 0.9576678991317749
|
|
|
|
key: score_time
|
|
value: [0.24854255 0.23675323 0.19931936 0.29718184 0.20119596 0.26618361
|
|
0.23444319 0.25496364 0.26070547 0.22386193]
|
|
|
|
mean value: 0.24231507778167724
|
|
|
|
key: test_mcc
|
|
value: [0.53300179 0.58218174 0.51319869 0.58342636 0.35484024 0.56934383
|
|
0.40643275 0.83918129 0.4633451 0.73099415]
|
|
|
|
mean value: 0.5575945928320912
|
|
|
|
key: train_mcc
|
|
value: [0.89860644 0.88700711 0.88065448 0.89278334 0.904728 0.88083868
|
|
0.89279859 0.89865612 0.88671444 0.88735534]
|
|
|
|
mean value: 0.89101425496765
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.78947368 0.75675676 0.78378378 0.67567568 0.78378378
|
|
0.7027027 0.91891892 0.72972973 0.86486486]
|
|
|
|
mean value: 0.7768847795163585
|
|
|
|
key: train_accuracy
|
|
value: [0.9491018 0.94311377 0.94029851 0.94626866 0.95223881 0.94029851
|
|
0.94626866 0.94925373 0.94328358 0.94328358]
|
|
|
|
mean value: 0.9453409598713022
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.77777778 0.74285714 0.8 0.68421053 0.76470588
|
|
0.7027027 0.91891892 0.72222222 0.86486486]
|
|
|
|
mean value: 0.7758747842890409
|
|
|
|
key: train_fscore
|
|
value: [0.94985251 0.94428152 0.9408284 0.94705882 0.95294118 0.94117647
|
|
0.94674556 0.9495549 0.94362018 0.94428152]
|
|
|
|
mean value: 0.9460341066497256
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.82352941 0.76470588 0.72727273 0.65 0.8125
|
|
0.72222222 0.94444444 0.76470588 0.88888889]
|
|
|
|
mean value: 0.7825542186571598
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
train_precision
|
|
value: [0.93604651 0.92528736 0.93529412 0.93604651 0.94186047 0.93023256
|
|
0.93567251 0.94117647 0.93529412 0.92528736]
|
|
|
|
mean value: 0.9342197979657542
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.73684211 0.72222222 0.88888889 0.72222222 0.72222222
|
|
0.68421053 0.89473684 0.68421053 0.84210526]
|
|
|
|
mean value: 0.7739766081871344
|
|
|
|
key: train_recall
|
|
value: [0.96407186 0.96407186 0.94642857 0.95833333 0.96428571 0.95238095
|
|
0.95808383 0.95808383 0.95209581 0.96407186]
|
|
|
|
mean value: 0.9581907613344739
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.78947368 0.75584795 0.78654971 0.67690058 0.78216374
|
|
0.70321637 0.91959064 0.73099415 0.86549708]
|
|
|
|
mean value: 0.7773391812865497
|
|
|
|
key: train_roc_auc
|
|
value: [0.9491018 0.94311377 0.94028015 0.94623253 0.95220274 0.94026233
|
|
0.94630382 0.94928001 0.94330981 0.94334545]
|
|
|
|
mean value: 0.9453432420872541
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.63636364 0.59090909 0.66666667 0.52 0.61904762
|
|
0.54166667 0.85 0.56521739 0.76190476]
|
|
|
|
mean value: 0.6391775832862789
|
|
|
|
key: train_jcc
|
|
value: [0.90449438 0.89444444 0.88826816 0.89944134 0.91011236 0.88888889
|
|
0.8988764 0.9039548 0.89325843 0.89444444]
|
|
|
|
mean value: 0.8976183650278077
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01151586 0.01388288 0.0112195 0.01145315 0.01135945 0.01469564
|
|
0.01843166 0.01183629 0.0118413 0.01111317]
|
|
|
|
mean value: 0.01273488998413086
|
|
|
|
key: score_time
|
|
value: [0.01000047 0.01027012 0.01018381 0.00975347 0.01245475 0.010602
|
|
0.01544094 0.01015806 0.01061845 0.00966144]
|
|
|
|
mean value: 0.010914349555969238
|
|
|
|
key: test_mcc
|
|
value: [0.63960215 0.37686733 0.30307132 0.24633537 0.09678053 0.29824561
|
|
0.35484024 0.73099415 0.40469382 0.29766651]
|
|
|
|
mean value: 0.37490970420832104
|
|
|
|
key: train_mcc
|
|
value: [0.49102677 0.53354668 0.53432421 0.51049724 0.5351962 0.5223838
|
|
0.51643142 0.50447336 0.50512364 0.49887672]
|
|
|
|
mean value: 0.5151880034378592
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.68421053 0.64864865 0.62162162 0.54054054 0.64864865
|
|
0.67567568 0.86486486 0.7027027 0.64864865]
|
|
|
|
mean value: 0.6851351351351351
|
|
|
|
key: train_accuracy
|
|
value: [0.74550898 0.76646707 0.76716418 0.75522388 0.76716418 0.76119403
|
|
0.75820896 0.75223881 0.75223881 0.74925373]
|
|
|
|
mean value: 0.757466261506837
|
|
|
|
key: test_fscore
|
|
value: [0.82926829 0.64705882 0.58064516 0.63157895 0.60465116 0.64864865
|
|
0.66666667 0.86486486 0.71794872 0.68292683]
|
|
|
|
mean value: 0.6874258115058971
|
|
|
|
key: train_fscore
|
|
value: [0.74626866 0.77192982 0.76785714 0.75449102 0.77456647 0.76190476
|
|
0.75820896 0.75075075 0.74461538 0.75294118]
|
|
|
|
mean value: 0.7583534145052842
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.73333333 0.69230769 0.6 0.52 0.63157895
|
|
0.70588235 0.88888889 0.7 0.63636364]
|
|
|
|
mean value: 0.6881082123930421
|
|
|
|
key: train_precision
|
|
value: [0.74404762 0.75428571 0.76785714 0.75903614 0.75280899 0.76190476
|
|
0.75595238 0.75301205 0.76582278 0.73988439]
|
|
|
|
mean value: 0.7554611978456459
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.57894737 0.5 0.66666667 0.72222222 0.66666667
|
|
0.63157895 0.84210526 0.73684211 0.73684211]
|
|
|
|
mean value: 0.6976608187134503
|
|
|
|
key: train_recall
|
|
value: [0.74850299 0.79041916 0.76785714 0.75 0.79761905 0.76190476
|
|
0.76047904 0.74850299 0.7245509 0.76646707]
|
|
|
|
mean value: 0.7616303108069575
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.68421053 0.64473684 0.62280702 0.54532164 0.64912281
|
|
0.67690058 0.86549708 0.70175439 0.64619883]
|
|
|
|
mean value: 0.685233918128655
|
|
|
|
key: train_roc_auc
|
|
value: [0.74550898 0.76646707 0.7671621 0.75523952 0.767073 0.7611919
|
|
0.75821571 0.75222769 0.7521564 0.74930496]
|
|
|
|
mean value: 0.7574547333903622
|
|
|
|
key: test_jcc
|
|
value: [0.70833333 0.47826087 0.40909091 0.46153846 0.43333333 0.48
|
|
0.5 0.76190476 0.56 0.51851852]
|
|
|
|
mean value: 0.5310980187284535
|
|
|
|
key: train_jcc
|
|
value: [0.5952381 0.62857143 0.62318841 0.60576923 0.63207547 0.61538462
|
|
0.61057692 0.60096154 0.59313725 0.60377358]
|
|
|
|
mean value: 0.6108676548804667
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [1.77180934 0.09794235 0.21260977 0.09664106 0.08850789 1.15565562
|
|
0.22728086 0.07496929 0.08025026 0.24029708]
|
|
|
|
mean value: 0.4045963525772095
|
|
|
|
key: score_time
|
|
value: [0.01116514 0.01147962 0.01104784 0.01183987 0.01121092 0.01109862
|
|
0.01103997 0.01101804 0.01100731 0.01104975]
|
|
|
|
mean value: 0.011195707321166991
|
|
|
|
key: test_mcc
|
|
value: [0.52704628 0.68421053 0.56725146 0.30384671 0.35087719 0.73020842
|
|
0.40643275 0.7888597 0.73099415 0.80369958]
|
|
|
|
mean value: 0.5893426760027083
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.84210526 0.78378378 0.64864865 0.67567568 0.86486486
|
|
0.7027027 0.89189189 0.86486486 0.89189189]
|
|
|
|
mean value: 0.7929587482219062
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.84210526 0.77777778 0.66666667 0.66666667 0.85714286
|
|
0.7027027 0.88888889 0.86486486 0.88235294]
|
|
|
|
mean value: 0.791839939827556
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.84210526 0.77777778 0.61904762 0.66666667 0.88235294
|
|
0.72222222 0.94117647 0.88888889 1. ]
|
|
|
|
mean value: 0.8090237849525775
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.84210526 0.77777778 0.72222222 0.66666667 0.83333333
|
|
0.68421053 0.84210526 0.84210526 0.78947368]
|
|
|
|
mean value: 0.7789473684210526
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.84210526 0.78362573 0.6505848 0.6754386 0.86403509
|
|
0.70321637 0.89327485 0.86549708 0.89473684]
|
|
|
|
mean value: 0.7935672514619883
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.72727273 0.63636364 0.5 0.5 0.75
|
|
0.54166667 0.8 0.76190476 0.78947368]
|
|
|
|
mean value: 0.6631681476418319
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04398203 0.077389 0.08039308 0.08657169 0.07196283 0.06845689
|
|
0.0758183 0.06191897 0.07105732 0.07343435]
|
|
|
|
mean value: 0.0710984468460083
|
|
|
|
key: score_time
|
|
value: [0.02191043 0.02393103 0.0243125 0.02371383 0.02336216 0.02134395
|
|
0.02300048 0.02396846 0.01992774 0.02339339]
|
|
|
|
mean value: 0.02288639545440674
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 0.15789474 0.4633451 0.13450292 0.44331728 0.52960948
|
|
0.29824561 0.62280702 0.30384671 0.62280702]
|
|
|
|
mean value: 0.3794593774830786
|
|
|
|
key: train_mcc
|
|
value: [0.80252471 0.83287305 0.83881661 0.85147038 0.82698754 0.80310162
|
|
0.83338695 0.80949336 0.81504458 0.79176052]
|
|
|
|
mean value: 0.8205459328185719
|
|
|
|
key: test_accuracy
|
|
value: [0.60526316 0.57894737 0.72972973 0.56756757 0.7027027 0.75675676
|
|
0.64864865 0.81081081 0.64864865 0.81081081]
|
|
|
|
mean value: 0.6859886201991465
|
|
|
|
key: train_accuracy
|
|
value: [0.9011976 0.91616766 0.91940299 0.92537313 0.91343284 0.90149254
|
|
0.91641791 0.90447761 0.90746269 0.89552239]
|
|
|
|
mean value: 0.9100947359013317
|
|
|
|
key: test_fscore
|
|
value: [0.65116279 0.57894737 0.73684211 0.55555556 0.74418605 0.70967742
|
|
0.64864865 0.81081081 0.62857143 0.81081081]
|
|
|
|
mean value: 0.6875212984645606
|
|
|
|
key: train_fscore
|
|
value: [0.90207715 0.91764706 0.91988131 0.9271137 0.91445428 0.90265487
|
|
0.91764706 0.90588235 0.90634441 0.8973607 ]
|
|
|
|
mean value: 0.9111062889416658
|
|
|
|
key: test_precision
|
|
value: [0.58333333 0.57894737 0.7 0.55555556 0.64 0.84615385
|
|
0.66666667 0.83333333 0.6875 0.83333333]
|
|
|
|
mean value: 0.6924823436797121
|
|
|
|
key: train_precision
|
|
value: [0.89411765 0.9017341 0.91715976 0.90857143 0.90643275 0.89473684
|
|
0.9017341 0.89017341 0.91463415 0.87931034]
|
|
|
|
mean value: 0.9008604539253295
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.57894737 0.77777778 0.55555556 0.88888889 0.61111111
|
|
0.63157895 0.78947368 0.57894737 0.78947368]
|
|
|
|
mean value: 0.6938596491228071
|
|
|
|
key: train_recall
|
|
value: [0.91017964 0.93413174 0.92261905 0.94642857 0.92261905 0.91071429
|
|
0.93413174 0.92215569 0.89820359 0.91616766]
|
|
|
|
mean value: 0.9217351012261192
|
|
|
|
key: test_roc_auc
|
|
value: [0.60526316 0.57894737 0.73099415 0.56725146 0.70760234 0.75292398
|
|
0.64912281 0.81140351 0.6505848 0.81140351]
|
|
|
|
mean value: 0.6865497076023392
|
|
|
|
key: train_roc_auc
|
|
value: [0.9011976 0.91616766 0.91939336 0.92531009 0.91340533 0.90146493
|
|
0.91647063 0.90453023 0.90743513 0.89558383]
|
|
|
|
mean value: 0.910095879669233
|
|
|
|
key: test_jcc
|
|
value: [0.48275862 0.40740741 0.58333333 0.38461538 0.59259259 0.55
|
|
0.48 0.68181818 0.45833333 0.68181818]
|
|
|
|
mean value: 0.530267703560807
|
|
|
|
key: train_jcc
|
|
value: [0.82162162 0.84782609 0.85164835 0.86413043 0.8423913 0.82258065
|
|
0.84782609 0.82795699 0.82872928 0.81382979]
|
|
|
|
mean value: 0.8368540589724052
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02415586 0.0095706 0.00922894 0.00924563 0.01002145 0.00947571
|
|
0.00966716 0.00922132 0.00936937 0.0092535 ]
|
|
|
|
mean value: 0.010920953750610352
|
|
|
|
key: score_time
|
|
value: [0.00920177 0.00874162 0.00863242 0.00856495 0.00930166 0.00883698
|
|
0.00876164 0.00860357 0.0085578 0.00866199]
|
|
|
|
mean value: 0.008786439895629883
|
|
|
|
key: test_mcc
|
|
value: [0.48454371 0.10660036 0.35104619 0.42489158 0.44331728 0.14287993
|
|
0.29824561 0.56725146 0.30307132 0.51793973]
|
|
|
|
mean value: 0.363978718106456
|
|
|
|
key: train_mcc
|
|
value: [0.40472488 0.4409689 0.40626794 0.39555602 0.38736277 0.42342906
|
|
0.45869881 0.37576464 0.38836126 0.43013991]
|
|
|
|
mean value: 0.4111274183260968
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.55263158 0.67567568 0.7027027 0.7027027 0.56756757
|
|
0.64864865 0.78378378 0.64864865 0.75675676]
|
|
|
|
mean value: 0.6775960170697013
|
|
|
|
key: train_accuracy
|
|
value: [0.7005988 0.71856287 0.70149254 0.69552239 0.69253731 0.71044776
|
|
0.72835821 0.68656716 0.69253731 0.71343284]
|
|
|
|
mean value: 0.7040057199034766
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.58536585 0.64705882 0.73170732 0.74418605 0.6
|
|
0.64864865 0.78947368 0.69767442 0.7804878 ]
|
|
|
|
mean value: 0.6986507359019384
|
|
|
|
key: train_fscore
|
|
value: [0.71910112 0.73595506 0.72067039 0.71823204 0.70985915 0.72676056
|
|
0.73925501 0.70254958 0.70985915 0.72881356]
|
|
|
|
mean value: 0.7211055636994568
|
|
|
|
key: test_precision
|
|
value: [0.69565217 0.54545455 0.6875 0.65217391 0.64 0.54545455
|
|
0.66666667 0.78947368 0.625 0.72727273]
|
|
|
|
mean value: 0.6574648256015533
|
|
|
|
key: train_precision
|
|
value: [0.67724868 0.69312169 0.67894737 0.67010309 0.67379679 0.68983957
|
|
0.70879121 0.66666667 0.67021277 0.68983957]
|
|
|
|
mean value: 0.6818567408819127
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.63157895 0.61111111 0.83333333 0.88888889 0.66666667
|
|
0.63157895 0.78947368 0.78947368 0.84210526]
|
|
|
|
mean value: 0.7526315789473684
|
|
|
|
key: train_recall
|
|
value: [0.76646707 0.78443114 0.76785714 0.77380952 0.75 0.76785714
|
|
0.77245509 0.74251497 0.75449102 0.77245509]
|
|
|
|
mean value: 0.7652338180781295
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.55263158 0.67397661 0.70614035 0.70760234 0.57017544
|
|
0.64912281 0.78362573 0.64473684 0.75438596]
|
|
|
|
mean value: 0.6779239766081872
|
|
|
|
key: train_roc_auc
|
|
value: [0.7005988 0.71856287 0.70129384 0.695288 0.69236527 0.71027588
|
|
0.72848945 0.68673368 0.6927217 0.7136085 ]
|
|
|
|
mean value: 0.7039937981180496
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.4137931 0.47826087 0.57692308 0.59259259 0.42857143
|
|
0.48 0.65217391 0.53571429 0.64 ]
|
|
|
|
mean value: 0.541341388524297
|
|
|
|
key: train_jcc
|
|
value: [0.56140351 0.58222222 0.56331878 0.56034483 0.55021834 0.57079646
|
|
0.58636364 0.54148472 0.55021834 0.57333333]
|
|
|
|
mean value: 0.5639704163126809
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0112946 0.01887894 0.01714182 0.0169425 0.02012491 0.01688933
|
|
0.02002192 0.01831722 0.01710081 0.01792026]
|
|
|
|
mean value: 0.01746323108673096
|
|
|
|
key: score_time
|
|
value: [0.00931668 0.01202917 0.01447034 0.01252007 0.01280284 0.0126853
|
|
0.01274681 0.01239681 0.01215363 0.01202393]
|
|
|
|
mean value: 0.012314558029174805
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.20365327 0.38474188 0.38474188 0.36315314 0.45644817
|
|
0.38173594 0.69356297 0.42489158 0.38474188]
|
|
|
|
mean value: 0.4310126259482233
|
|
|
|
key: train_mcc
|
|
value: [0.62634471 0.52841803 0.4151561 0.56639845 0.76250995 0.58253038
|
|
0.61958208 0.60448535 0.68858317 0.27232219]
|
|
|
|
mean value: 0.566633042078675
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.57894737 0.62162162 0.62162162 0.67567568 0.7027027
|
|
0.67567568 0.83783784 0.7027027 0.62162162]
|
|
|
|
mean value: 0.6854196301564722
|
|
|
|
key: train_accuracy
|
|
value: [0.80538922 0.7245509 0.64776119 0.74328358 0.88059701 0.7761194
|
|
0.78208955 0.7880597 0.83880597 0.57014925]
|
|
|
|
mean value: 0.755680579140227
|
|
|
|
key: test_fscore
|
|
value: [0.81081081 0.68 0.72 0.72 0.7 0.59259259
|
|
0.73913043 0.82352941 0.66666667 0.41666667]
|
|
|
|
mean value: 0.6869396583284051
|
|
|
|
key: train_fscore
|
|
value: [0.78114478 0.78199052 0.74008811 0.79620853 0.88439306 0.73498233
|
|
0.81885856 0.74911661 0.82236842 0.24210526]
|
|
|
|
mean value: 0.7351256187522069
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.5483871 0.5625 0.5625 0.63636364 0.88888889
|
|
0.62962963 0.93333333 0.78571429 1. ]
|
|
|
|
mean value: 0.7380650204037301
|
|
|
|
key: train_precision
|
|
value: [0.89230769 0.64705882 0.58741259 0.66141732 0.85955056 0.90434783
|
|
0.69915254 0.9137931 0.91240876 1. ]
|
|
|
|
mean value: 0.8077449218914291
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.89473684 1. 1. 0.77777778 0.44444444
|
|
0.89473684 0.73684211 0.57894737 0.26315789]
|
|
|
|
mean value: 0.7380116959064328
|
|
|
|
key: train_recall
|
|
value: [0.69461078 0.98802395 1. 1. 0.91071429 0.61904762
|
|
0.98802395 0.63473054 0.74850299 0.13772455]
|
|
|
|
mean value: 0.7721378671228971
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.57894737 0.63157895 0.63157895 0.67836257 0.69590643
|
|
0.66959064 0.84064327 0.70614035 0.63157895]
|
|
|
|
mean value: 0.6880116959064327
|
|
|
|
key: train_roc_auc
|
|
value: [0.80538922 0.7245509 0.64670659 0.74251497 0.88050684 0.77658968
|
|
0.78270245 0.78760336 0.83853721 0.56886228]
|
|
|
|
mean value: 0.7553963501568293
|
|
|
|
key: test_jcc
|
|
value: [0.68181818 0.51515152 0.5625 0.5625 0.53846154 0.42105263
|
|
0.5862069 0.7 0.5 0.26315789]
|
|
|
|
mean value: 0.5330848658298749
|
|
|
|
key: train_jcc
|
|
value: [0.64088398 0.64202335 0.58741259 0.66141732 0.79274611 0.58100559
|
|
0.69327731 0.59887006 0.69832402 0.13772455]
|
|
|
|
mean value: 0.6033684875699221
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01800632 0.02126884 0.01854968 0.02176571 0.0162394 0.02095675
|
|
0.01644278 0.0201745 0.01830602 0.02004957]
|
|
|
|
mean value: 0.019175958633422852
|
|
|
|
key: score_time
|
|
value: [0.01222658 0.01200747 0.01216602 0.01212478 0.01207376 0.01186252
|
|
0.01209474 0.01432395 0.01220989 0.01204729]
|
|
|
|
mean value: 0.01231369972229004
|
|
|
|
key: test_mcc
|
|
value: [0.54554473 0.35856858 0.45906433 0.47328975 0.63309535 0.45644817
|
|
0.40780312 0.74044197 0.48078072 0.64788432]
|
|
|
|
mean value: 0.5202921041727678
|
|
|
|
key: train_mcc
|
|
value: [0.73912877 0.67437017 0.72048596 0.79707717 0.65355222 0.58846798
|
|
0.66998181 0.69183424 0.53048727 0.65522829]
|
|
|
|
mean value: 0.6720613880139767
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.65789474 0.72972973 0.72972973 0.78378378 0.7027027
|
|
0.7027027 0.86486486 0.72972973 0.81081081]
|
|
|
|
mean value: 0.7475106685633002
|
|
|
|
key: train_accuracy
|
|
value: [0.86826347 0.82634731 0.85970149 0.89850746 0.81791045 0.77313433
|
|
0.82985075 0.83880597 0.72238806 0.82089552]
|
|
|
|
mean value: 0.825580480829386
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.55172414 0.72222222 0.75 0.81818182 0.59259259
|
|
0.73170732 0.85714286 0.77272727 0.78787879]
|
|
|
|
mean value: 0.7374874680168361
|
|
|
|
key: train_fscore
|
|
value: [0.87356322 0.80136986 0.85626911 0.89820359 0.83733333 0.72262774
|
|
0.84297521 0.82 0.78117647 0.8 ]
|
|
|
|
mean value: 0.8233518535128138
|
|
|
|
key: test_precision
|
|
value: [0.70833333 0.8 0.72222222 0.68181818 0.69230769 0.88888889
|
|
0.68181818 0.9375 0.68 0.92857143]
|
|
|
|
mean value: 0.7721459928959928
|
|
|
|
key: train_precision
|
|
value: [0.83977901 0.936 0.88050314 0.90361446 0.75845411 0.93396226
|
|
0.78061224 0.92481203 0.64341085 0.90225564]
|
|
|
|
mean value: 0.8503403745225482
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.42105263 0.72222222 0.83333333 1. 0.44444444
|
|
0.78947368 0.78947368 0.89473684 0.68421053]
|
|
|
|
mean value: 0.7473684210526316
|
|
|
|
key: train_recall
|
|
value: [0.91017964 0.7005988 0.83333333 0.89285714 0.93452381 0.58928571
|
|
0.91616766 0.73652695 0.99401198 0.71856287]
|
|
|
|
mean value: 0.8226047904191617
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.65789474 0.72953216 0.73245614 0.78947368 0.69590643
|
|
0.7002924 0.86695906 0.7251462 0.81432749]
|
|
|
|
mean value: 0.7475146198830409
|
|
|
|
key: train_roc_auc
|
|
value: [0.86826347 0.82634731 0.85978044 0.89852438 0.81756131 0.77368477
|
|
0.83010764 0.83850157 0.72319646 0.82059096]
|
|
|
|
mean value: 0.8256558311947534
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.38095238 0.56521739 0.6 0.69230769 0.42105263
|
|
0.57692308 0.75 0.62962963 0.65 ]
|
|
|
|
mean value: 0.5919928956542229
|
|
|
|
key: train_jcc
|
|
value: [0.7755102 0.66857143 0.7486631 0.81521739 0.72018349 0.56571429
|
|
0.72857143 0.69491525 0.64092664 0.66666667]
|
|
|
|
mean value: 0.7024939887916529
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.18781757 0.15824699 0.16303682 0.16295147 0.18394685 0.15199327
|
|
0.15818834 0.16116714 0.15887046 0.15602541]
|
|
|
|
mean value: 0.16422443389892577
|
|
|
|
key: score_time
|
|
value: [0.01582694 0.01701689 0.01685357 0.0169425 0.01502705 0.01557589
|
|
0.01670194 0.01666617 0.01580858 0.01746845]
|
|
|
|
mean value: 0.016388797760009767
|
|
|
|
key: test_mcc
|
|
value: [0.47368421 0.52704628 0.45906433 0.36315314 0.35087719 0.73821295
|
|
0.40780312 0.73020842 0.56725146 0.78362573]
|
|
|
|
mean value: 0.5400926829819251
|
|
|
|
key: train_mcc
|
|
value: [0.94626048 0.97021644 0.95250666 0.94641713 0.97016575 0.93434559
|
|
0.97618963 0.94036714 0.94029798 0.94628409]
|
|
|
|
mean value: 0.9523050878154723
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.76315789 0.72972973 0.67567568 0.67567568 0.86486486
|
|
0.7027027 0.86486486 0.78378378 0.89189189]
|
|
|
|
mean value: 0.768918918918919
|
|
|
|
key: train_accuracy
|
|
value: [0.97305389 0.98502994 0.9761194 0.97313433 0.98507463 0.96716418
|
|
0.9880597 0.97014925 0.97014925 0.97313433]
|
|
|
|
mean value: 0.9761068906962195
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.75675676 0.72222222 0.7 0.66666667 0.84848485
|
|
0.73170732 0.87179487 0.78947368 0.89473684]
|
|
|
|
mean value: 0.7718685314577484
|
|
|
|
key: train_fscore
|
|
value: [0.97280967 0.9851632 0.97647059 0.97345133 0.9851632 0.96716418
|
|
0.98809524 0.9702381 0.97005988 0.97297297]
|
|
|
|
mean value: 0.9761588358488492
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.77777778 0.72222222 0.63636364 0.66666667 0.93333333
|
|
0.68181818 0.85 0.78947368 0.89473684]
|
|
|
|
mean value: 0.7689234449760766
|
|
|
|
key: train_precision
|
|
value: [0.98170732 0.97647059 0.96511628 0.96491228 0.98224852 0.97005988
|
|
0.98224852 0.96449704 0.97005988 0.97590361]
|
|
|
|
mean value: 0.9733223922857097
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.73684211 0.72222222 0.77777778 0.66666667 0.77777778
|
|
0.78947368 0.89473684 0.78947368 0.89473684]
|
|
|
|
mean value: 0.7786549707602339
|
|
|
|
key: train_recall
|
|
value: [0.96407186 0.99401198 0.98809524 0.98214286 0.98809524 0.96428571
|
|
0.99401198 0.9760479 0.97005988 0.97005988]
|
|
|
|
mean value: 0.979088252067294
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.76315789 0.72953216 0.67836257 0.6754386 0.8625731
|
|
0.7002924 0.86403509 0.78362573 0.89181287]
|
|
|
|
mean value: 0.7685672514619883
|
|
|
|
key: train_roc_auc
|
|
value: [0.97305389 0.98502994 0.97608355 0.97310736 0.98506558 0.9671728
|
|
0.98807742 0.97016681 0.97014899 0.97312518]
|
|
|
|
mean value: 0.9761031508411748
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.60869565 0.56521739 0.53846154 0.5 0.73684211
|
|
0.57692308 0.77272727 0.65217391 0.80952381]
|
|
|
|
mean value: 0.6343898092753928
|
|
|
|
key: train_jcc
|
|
value: [0.94705882 0.97076023 0.95402299 0.94827586 0.97076023 0.93641618
|
|
0.97647059 0.94219653 0.94186047 0.94736842]
|
|
|
|
mean value: 0.9535190333107592
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04647446 0.05159092 0.06955075 0.0711534 0.07107329 0.07597518
|
|
0.0638113 0.08366609 0.07446146 0.08396101]
|
|
|
|
mean value: 0.06917178630828857
|
|
|
|
key: score_time
|
|
value: [0.01956868 0.02185607 0.02191305 0.03379774 0.02967048 0.02284503
|
|
0.0289433 0.033741 0.02987695 0.02643299]
|
|
|
|
mean value: 0.02686452865600586
|
|
|
|
key: test_mcc
|
|
value: [0.57894737 0.58218174 0.67849265 0.40643275 0.29824561 0.62807634
|
|
0.40469382 0.68035483 0.68035483 0.67434178]
|
|
|
|
mean value: 0.5612121720676296
|
|
|
|
key: train_mcc
|
|
value: [0.97611791 0.97611791 0.9880596 0.95251339 0.95251339 0.92862747
|
|
0.9470891 0.95836009 0.96424625 0.94641713]
|
|
|
|
mean value: 0.9590062241813834
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.78947368 0.83783784 0.7027027 0.64864865 0.81081081
|
|
0.7027027 0.83783784 0.83783784 0.81081081]
|
|
|
|
mean value: 0.7768136557610242
|
|
|
|
key: train_accuracy
|
|
value: [0.98802395 0.98802395 0.99402985 0.9761194 0.9761194 0.9641791
|
|
0.97313433 0.97910448 0.98208955 0.97313433]
|
|
|
|
mean value: 0.9793958351952811
|
|
|
|
key: test_fscore
|
|
value: [0.78947368 0.77777778 0.82352941 0.7027027 0.64864865 0.78787879
|
|
0.71794872 0.83333333 0.83333333 0.77419355]
|
|
|
|
mean value: 0.768881994598563
|
|
|
|
key: train_fscore
|
|
value: [0.98795181 0.98795181 0.99404762 0.97590361 0.97590361 0.96385542
|
|
0.97247706 0.97885196 0.98192771 0.97280967]
|
|
|
|
mean value: 0.9791680290591356
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.82352941 0.875 0.68421053 0.63157895 0.86666667
|
|
0.7 0.88235294 0.88235294 1. ]
|
|
|
|
mean value: 0.8135165118679051
|
|
|
|
key: train_precision
|
|
value: [0.99393939 0.99393939 0.99404762 0.98780488 0.98780488 0.97560976
|
|
0.99375 0.98780488 0.98787879 0.98170732]
|
|
|
|
mean value: 0.9884286902122268
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.73684211 0.77777778 0.72222222 0.66666667 0.72222222
|
|
0.73684211 0.78947368 0.78947368 0.63157895]
|
|
|
|
mean value: 0.7362573099415205
|
|
|
|
key: train_recall
|
|
value: [0.98203593 0.98203593 0.99404762 0.96428571 0.96428571 0.95238095
|
|
0.95209581 0.97005988 0.9760479 0.96407186]
|
|
|
|
mean value: 0.9701347305389222
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.78947368 0.83625731 0.70321637 0.64912281 0.80847953
|
|
0.70175439 0.83918129 0.83918129 0.81578947]
|
|
|
|
mean value: 0.7771929824561403
|
|
|
|
key: train_roc_auc
|
|
value: [0.98802395 0.98802395 0.9940298 0.97615483 0.97615483 0.96421443
|
|
0.97307171 0.97907756 0.98207157 0.97310736]
|
|
|
|
mean value: 0.979392999714856
|
|
|
|
key: test_jcc
|
|
value: [0.65217391 0.63636364 0.7 0.54166667 0.48 0.65
|
|
0.56 0.71428571 0.71428571 0.63157895]
|
|
|
|
mean value: 0.628035459201363
|
|
|
|
key: train_jcc
|
|
value: [0.97619048 0.97619048 0.98816568 0.95294118 0.95294118 0.93023256
|
|
0.94642857 0.95857988 0.96449704 0.94705882]
|
|
|
|
mean value: 0.9593225861969943
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05166841 0.0913372 0.08585739 0.06117463 0.09166098 0.0636065
|
|
0.0878315 0.12063551 0.14016867 0.09943056]
|
|
|
|
mean value: 0.08933713436126708
|
|
|
|
key: score_time
|
|
value: [0.01588869 0.0228138 0.01684928 0.02300811 0.01367974 0.02157831
|
|
0.01367021 0.02262735 0.0221386 0.01382113]
|
|
|
|
mean value: 0.018607521057128908
|
|
|
|
key: test_mcc
|
|
value: [0.10910895 0.26315789 0.29618896 0.42489158 0.18768409 0.51793973
|
|
0.29824561 0.35087719 0.40469382 0.6754386 ]
|
|
|
|
mean value: 0.35282264220800774
|
|
|
|
key: train_mcc
|
|
value: [0.98802395 0.9940298 0.9880596 0.9880596 0.9880596 0.98210658
|
|
0.98210658 0.98210721 0.98210721 0.98813046]
|
|
|
|
mean value: 0.9862790575696734
|
|
|
|
key: test_accuracy
|
|
value: [0.55263158 0.63157895 0.64864865 0.7027027 0.59459459 0.75675676
|
|
0.64864865 0.67567568 0.7027027 0.83783784]
|
|
|
|
mean value: 0.6751778093883357
|
|
|
|
key: train_accuracy
|
|
value: [0.99401198 0.99700599 0.99402985 0.99402985 0.99402985 0.99104478
|
|
0.99104478 0.99104478 0.99104478 0.99402985]
|
|
|
|
mean value: 0.9931316471534544
|
|
|
|
key: test_fscore
|
|
value: [0.60465116 0.63157895 0.62857143 0.73170732 0.57142857 0.72727273
|
|
0.64864865 0.68421053 0.71794872 0.84210526]
|
|
|
|
mean value: 0.6788123310576067
|
|
|
|
key: train_fscore
|
|
value: [0.99401198 0.99701493 0.99404762 0.99404762 0.99404762 0.99109792
|
|
0.99099099 0.99104478 0.99104478 0.99404762]
|
|
|
|
mean value: 0.9931395843689976
|
|
|
|
key: test_precision
|
|
value: [0.54166667 0.63157895 0.64705882 0.65217391 0.58823529 0.8
|
|
0.66666667 0.68421053 0.7 0.84210526]
|
|
|
|
mean value: 0.6753696100865976
|
|
|
|
key: train_precision
|
|
value: [0.99401198 0.99404762 0.99404762 0.99404762 0.99404762 0.98816568
|
|
0.9939759 0.98809524 0.98809524 0.98816568]
|
|
|
|
mean value: 0.991670019299006
|
|
|
|
key: test_recall
|
|
value: [0.68421053 0.63157895 0.61111111 0.83333333 0.55555556 0.66666667
|
|
0.63157895 0.68421053 0.73684211 0.84210526]
|
|
|
|
mean value: 0.6877192982456141
|
|
|
|
key: train_recall
|
|
value: [0.99401198 1. 0.99404762 0.99404762 0.99404762 0.99404762
|
|
0.98802395 0.99401198 0.99401198 1. ]
|
|
|
|
mean value: 0.9946250356429998
|
|
|
|
key: test_roc_auc
|
|
value: [0.55263158 0.63157895 0.64766082 0.70614035 0.59356725 0.75438596
|
|
0.64912281 0.6754386 0.70175439 0.8377193 ]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_roc_auc
|
|
value: [0.99401198 0.99700599 0.9940298 0.9940298 0.9940298 0.99103579
|
|
0.99103579 0.99105361 0.99105361 0.99404762]
|
|
|
|
mean value: 0.993133376104933
|
|
|
|
key: test_jcc
|
|
value: [0.43333333 0.46153846 0.45833333 0.57692308 0.4 0.57142857
|
|
0.48 0.52 0.56 0.72727273]
|
|
|
|
mean value: 0.5188829503829504
|
|
|
|
key: train_jcc
|
|
value: [0.98809524 0.99404762 0.98816568 0.98816568 0.98816568 0.98235294
|
|
0.98214286 0.98224852 0.98224852 0.98816568]
|
|
|
|
mean value: 0.9863798418775794
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.58621264 0.5736165 0.56978011 0.56975627 0.59718847 0.57798433
|
|
0.61189032 0.58795667 0.56403637 0.56556082]
|
|
|
|
mean value: 0.5803982496261597
|
|
|
|
key: score_time
|
|
value: [0.01032114 0.00955176 0.0095222 0.00964332 0.00958204 0.00942588
|
|
0.00940585 0.00963306 0.00961947 0.00969934]
|
|
|
|
mean value: 0.00964040756225586
|
|
|
|
key: test_mcc
|
|
value: [0.68803296 0.68803296 0.62280702 0.35484024 0.41299552 0.78362573
|
|
0.40469382 0.78362573 0.73020842 0.69356297]
|
|
|
|
mean value: 0.6162425371931172
|
|
|
|
key: train_mcc
|
|
value: [1. 0.9940298 1. 1. 1. 1. 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.9994029797388004
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.84210526 0.81081081 0.67567568 0.7027027 0.89189189
|
|
0.7027027 0.89189189 0.86486486 0.83783784]
|
|
|
|
mean value: 0.8062588904694168
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99700599 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997005988023953
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.83333333 0.81081081 0.68421053 0.71794872 0.88888889
|
|
0.71794872 0.89473684 0.87179487 0.82352941]
|
|
|
|
mean value: 0.8076535454244432
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99701493 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997014925373134
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.88235294 0.78947368 0.65 0.66666667 0.88888889
|
|
0.7 0.89473684 0.85 0.93333333]
|
|
|
|
mean value: 0.813780529755762
|
|
|
|
key: train_precision
|
|
value: [1. 0.99404762 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994047619047619
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.78947368 0.83333333 0.72222222 0.77777778 0.88888889
|
|
0.73684211 0.89473684 0.89473684 0.73684211]
|
|
|
|
mean value: 0.8064327485380117
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.84210526 0.81140351 0.67690058 0.70467836 0.89181287
|
|
0.70175439 0.89181287 0.86403509 0.84064327]
|
|
|
|
mean value: 0.8067251461988304
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99700599 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997005988023953
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.71428571 0.68181818 0.52 0.56 0.8
|
|
0.56 0.80952381 0.77272727 0.7 ]
|
|
|
|
mean value: 0.6832640692640692
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99404762 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994047619047619
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02552152 0.02762151 0.02876067 0.03926182 0.03176737 0.02885699
|
|
0.03105807 0.0261817 0.02655768 0.0263772 ]
|
|
|
|
mean value: 0.029196453094482423
|
|
|
|
key: score_time
|
|
value: [0.01214433 0.01285648 0.01266003 0.02276683 0.01387072 0.01373124
|
|
0.0125742 0.0156312 0.0155077 0.01567101]
|
|
|
|
mean value: 0.01474137306213379
|
|
|
|
key: test_mcc
|
|
value: [0.54554473 0.15877684 0.36315314 0.37654316 0.09040246 0.4633451
|
|
0.45906433 0.29824561 0.29618896 0.45906433]
|
|
|
|
mean value: 0.3510328655219859
|
|
|
|
key: train_mcc
|
|
value: [0.80837203 0.88430752 0.80670614 0.80878922 0.71558156 0.71558156
|
|
0.77044833 0.89930469 0.87629255 0.83962036]
|
|
|
|
mean value: 0.8125003960240069
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.57894737 0.67567568 0.67567568 0.54054054 0.72972973
|
|
0.72972973 0.64864865 0.64864865 0.72972973]
|
|
|
|
mean value: 0.6720483641536273
|
|
|
|
key: train_accuracy
|
|
value: [0.89520958 0.94011976 0.89850746 0.89552239 0.83880597 0.83880597
|
|
0.87761194 0.94925373 0.93432836 0.91343284]
|
|
|
|
mean value: 0.8981597998033783
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.6 0.7 0.71428571 0.58536585 0.73684211
|
|
0.73684211 0.64864865 0.66666667 0.73684211]
|
|
|
|
mean value: 0.6916190873467645
|
|
|
|
key: train_fscore
|
|
value: [0.90514905 0.94285714 0.90607735 0.90566038 0.86153846 0.86153846
|
|
0.88828338 0.95014663 0.93820225 0.92011019]
|
|
|
|
mean value: 0.9079563289190423
|
|
|
|
key: test_precision
|
|
value: [0.70833333 0.57142857 0.63636364 0.625 0.52173913 0.7
|
|
0.73684211 0.66666667 0.65 0.73684211]
|
|
|
|
mean value: 0.6553215548753306
|
|
|
|
key: train_precision
|
|
value: [0.82673267 0.90163934 0.84536082 0.82758621 0.75675676 0.75675676
|
|
0.815 0.93103448 0.88359788 0.85204082]
|
|
|
|
mean value: 0.839650574536499
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.63157895 0.77777778 0.83333333 0.66666667 0.77777778
|
|
0.73684211 0.63157895 0.68421053 0.73684211]
|
|
|
|
mean value: 0.7371345029239766
|
|
|
|
key: train_recall
|
|
value: [1. 0.98802395 0.97619048 1. 1. 1.
|
|
0.9760479 0.97005988 1. 1. ]
|
|
|
|
mean value: 0.9910322212717422
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.57894737 0.67836257 0.67982456 0.54385965 0.73099415
|
|
0.72953216 0.64912281 0.64766082 0.72953216]
|
|
|
|
mean value: 0.6730994152046783
|
|
|
|
key: train_roc_auc
|
|
value: [0.89520958 0.94011976 0.89827488 0.89520958 0.83832335 0.83832335
|
|
0.8779049 0.94931565 0.93452381 0.91369048]
|
|
|
|
mean value: 0.8980895352152838
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.42857143 0.53846154 0.55555556 0.4137931 0.58333333
|
|
0.58333333 0.48 0.5 0.58333333]
|
|
|
|
mean value: 0.5320227779882952
|
|
|
|
key: train_jcc
|
|
value: [0.82673267 0.89189189 0.82828283 0.82758621 0.75675676 0.75675676
|
|
0.79901961 0.90502793 0.88359788 0.85204082]
|
|
|
|
mean value: 0.8327693354580558
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02349615 0.03346586 0.03672242 0.03666854 0.03695321 0.03859115
|
|
0.03790188 0.03890991 0.03675103 0.0367806 ]
|
|
|
|
mean value: 0.03562407493591309
|
|
|
|
key: score_time
|
|
value: [0.01728749 0.01470208 0.02226615 0.02306604 0.01886368 0.02349687
|
|
0.02382088 0.02288771 0.02065992 0.02411103]
|
|
|
|
mean value: 0.021116185188293456
|
|
|
|
key: test_mcc
|
|
value: [0.38829014 0.31622777 0.51461988 0.35087719 0.31339521 0.62807634
|
|
0.24269006 0.73099415 0.47328975 0.78362573]
|
|
|
|
mean value: 0.47420862187492174
|
|
|
|
key: train_mcc
|
|
value: [0.73673831 0.7969217 0.76717993 0.79706271 0.78507271 0.76139205
|
|
0.77946949 0.76729762 0.80312267 0.76717993]
|
|
|
|
mean value: 0.7761437103910086
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.65789474 0.75675676 0.67567568 0.64864865 0.81081081
|
|
0.62162162 0.86486486 0.72972973 0.89189189]
|
|
|
|
mean value: 0.7342105263157895
|
|
|
|
key: train_accuracy
|
|
value: [0.86826347 0.89820359 0.88358209 0.89850746 0.89253731 0.88059701
|
|
0.88955224 0.88358209 0.90149254 0.88358209]
|
|
|
|
mean value: 0.8879899901689159
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.66666667 0.75675676 0.66666667 0.68292683 0.78787879
|
|
0.63157895 0.86486486 0.70588235 0.89473684]
|
|
|
|
mean value: 0.7385231441789624
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.86982249 0.9 0.88358209 0.89940828 0.89285714 0.88235294
|
|
0.89085546 0.884273 0.90207715 0.88358209]
|
|
|
|
mean value: 0.8888810637963952
|
|
|
|
key: test_precision
|
|
value: [0.64 0.65 0.73684211 0.66666667 0.60869565 0.86666667
|
|
0.63157895 0.88888889 0.8 0.89473684]
|
|
|
|
mean value: 0.7384075769132977
|
|
|
|
key: train_precision
|
|
value: [0.85964912 0.88439306 0.88622754 0.89411765 0.89285714 0.87209302
|
|
0.87790698 0.87647059 0.89411765 0.88095238]
|
|
|
|
mean value: 0.8818785137463477
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.68421053 0.77777778 0.66666667 0.77777778 0.72222222
|
|
0.63157895 0.84210526 0.63157895 0.89473684]
|
|
|
|
mean value: 0.7470760233918129
|
|
|
|
key: train_recall
|
|
value: [0.88023952 0.91616766 0.88095238 0.9047619 0.89285714 0.89285714
|
|
0.90419162 0.89221557 0.91017964 0.88622754]
|
|
|
|
mean value: 0.8960650128314799
|
|
|
|
key: test_roc_auc
|
|
value: [0.68421053 0.65789474 0.75730994 0.6754386 0.65204678 0.80847953
|
|
0.62134503 0.86549708 0.73245614 0.89181287]
|
|
|
|
mean value: 0.7346491228070176
|
|
|
|
key: train_roc_auc
|
|
value: [0.86826347 0.89820359 0.88358996 0.89848874 0.89253636 0.88056031
|
|
0.88959581 0.88360778 0.90151839 0.88358996]
|
|
|
|
mean value: 0.8879954376960365
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.5 0.60869565 0.5 0.51851852 0.65
|
|
0.46153846 0.76190476 0.54545455 0.80952381]
|
|
|
|
mean value: 0.5927064320542581
|
|
|
|
key: train_jcc
|
|
value: [0.76963351 0.81818182 0.79144385 0.8172043 0.80645161 0.78947368
|
|
0.80319149 0.79255319 0.82162162 0.79144385]
|
|
|
|
mean value: 0.8001198927231687
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.24603772 0.26498508 0.38994241 0.27792549 0.27359962 0.25516629
|
|
0.26835465 0.25485849 0.25434923 0.31525064]
|
|
|
|
mean value: 0.28004696369171145
|
|
|
|
key: score_time
|
|
value: [0.02367854 0.02116728 0.03723645 0.02386928 0.02052045 0.02284169
|
|
0.02115107 0.0218339 0.02415466 0.02840328]
|
|
|
|
mean value: 0.0244856595993042
|
|
|
|
key: test_mcc
|
|
value: [0.38829014 0.36842105 0.51319869 0.35087719 0.36315314 0.62807634
|
|
0.24269006 0.7888597 0.41299552 0.6754386 ]
|
|
|
|
mean value: 0.47320004273762717
|
|
|
|
key: train_mcc
|
|
value: [0.73673831 0.71919495 0.68367203 0.79706271 0.76125468 0.76139205
|
|
0.77946949 0.63613354 0.80895352 0.65982248]
|
|
|
|
mean value: 0.7343693758553534
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.68421053 0.75675676 0.67567568 0.67567568 0.81081081
|
|
0.62162162 0.89189189 0.7027027 0.83783784]
|
|
|
|
mean value: 0.7341394025604552
|
|
|
|
key: train_accuracy
|
|
value: [0.86826347 0.85928144 0.84179104 0.89850746 0.88059701 0.88059701
|
|
0.88955224 0.81791045 0.90447761 0.82985075]
|
|
|
|
mean value: 0.8670828492269193
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.68421053 0.74285714 0.66666667 0.7 0.78787879
|
|
0.63157895 0.88888889 0.68571429 0.84210526]
|
|
|
|
mean value: 0.7357173236120604
|
|
|
|
key: train_fscore
|
|
value: [0.86982249 0.86217009 0.84365782 0.89940828 0.88023952 0.88235294
|
|
0.89085546 0.820059 0.90419162 0.83086053]
|
|
|
|
mean value: 0.868361774161939
|
|
|
|
key: test_precision
|
|
value: [0.64 0.68421053 0.76470588 0.66666667 0.63636364 0.86666667
|
|
0.63157895 0.94117647 0.75 0.84210526]
|
|
|
|
mean value: 0.7423474059480252
|
|
|
|
key: train_precision
|
|
value: [0.85964912 0.84482759 0.83625731 0.89411765 0.88554217 0.87209302
|
|
0.87790698 0.80813953 0.90419162 0.82352941]
|
|
|
|
mean value: 0.8606254398103851
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.68421053 0.72222222 0.66666667 0.77777778 0.72222222
|
|
0.63157895 0.84210526 0.63157895 0.84210526]
|
|
|
|
mean value: 0.7362573099415204
|
|
|
|
key: train_recall
|
|
value: [0.88023952 0.88023952 0.85119048 0.9047619 0.875 0.89285714
|
|
0.90419162 0.83233533 0.90419162 0.83832335]
|
|
|
|
mean value: 0.8763330481893357
|
|
|
|
key: test_roc_auc
|
|
value: [0.68421053 0.68421053 0.75584795 0.6754386 0.67836257 0.80847953
|
|
0.62134503 0.89327485 0.70467836 0.8377193 ]
|
|
|
|
mean value: 0.7343567251461989
|
|
|
|
key: train_roc_auc
|
|
value: [0.86826347 0.85928144 0.8417629 0.89848874 0.88061377 0.88056031
|
|
0.88959581 0.81795338 0.90447676 0.82987596]
|
|
|
|
mean value: 0.867087254063302
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.52 0.59090909 0.5 0.53846154 0.65
|
|
0.46153846 0.8 0.52173913 0.72727273]
|
|
|
|
mean value: 0.5881349520045173
|
|
|
|
key: train_jcc
|
|
value: [0.76963351 0.75773196 0.72959184 0.8172043 0.78609626 0.78947368
|
|
0.80319149 0.695 0.82513661 0.7106599 ]
|
|
|
|
mean value: 0.7683719545181988
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03324914 0.04741716 0.0348413 0.03552556 0.03563619 0.03589916
|
|
0.03925538 0.03781486 0.03497291 0.03469563]
|
|
|
|
mean value: 0.0369307279586792
|
|
|
|
key: score_time
|
|
value: [0.01216602 0.01860523 0.01482677 0.01195216 0.01207995 0.01198125
|
|
0.01210189 0.01472735 0.01470685 0.01486182]
|
|
|
|
mean value: 0.013800930976867676
|
|
|
|
key: test_mcc
|
|
value: [0.63960215 0.47633051 0.45906433 0.30384671 0.26327408 0.40469382
|
|
0.4633451 0.78362573 0.40643275 0.56725146]
|
|
|
|
mean value: 0.47674666375601193
|
|
|
|
key: train_mcc
|
|
value: [0.64681096 0.67669524 0.67764589 0.68972407 0.71347093 0.67218529
|
|
0.67766887 0.6537872 0.65393941 0.65393941]
|
|
|
|
mean value: 0.671586726844115
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.73684211 0.72972973 0.64864865 0.62162162 0.7027027
|
|
0.72972973 0.89189189 0.7027027 0.78378378]
|
|
|
|
mean value: 0.7363442389758179
|
|
|
|
key: train_accuracy
|
|
value: [0.82335329 0.83832335 0.83880597 0.84477612 0.85671642 0.8358209
|
|
0.83880597 0.82686567 0.82686567 0.82686567]
|
|
|
|
mean value: 0.8357199034766288
|
|
|
|
key: test_fscore
|
|
value: [0.82926829 0.72222222 0.72222222 0.66666667 0.66666667 0.68571429
|
|
0.72222222 0.89473684 0.7027027 0.78947368]
|
|
|
|
mean value: 0.7401895807415705
|
|
|
|
key: train_fscore
|
|
value: [0.82492582 0.83928571 0.84023669 0.84705882 0.85798817 0.83965015
|
|
0.83928571 0.82738095 0.82840237 0.82840237]
|
|
|
|
mean value: 0.8372616752076943
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.76470588 0.72222222 0.61904762 0.58333333 0.70588235
|
|
0.76470588 0.89473684 0.72222222 0.78947368]
|
|
|
|
mean value: 0.7339057313515518
|
|
|
|
key: train_precision
|
|
value: [0.81764706 0.83431953 0.83529412 0.8372093 0.85294118 0.82285714
|
|
0.83431953 0.82248521 0.81871345 0.81871345]
|
|
|
|
mean value: 0.8294499959063726
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.68421053 0.72222222 0.72222222 0.77777778 0.66666667
|
|
0.68421053 0.89473684 0.68421053 0.78947368]
|
|
|
|
mean value: 0.752046783625731
|
|
|
|
key: train_recall
|
|
value: [0.83233533 0.84431138 0.8452381 0.85714286 0.86309524 0.85714286
|
|
0.84431138 0.83233533 0.83832335 0.83832335]
|
|
|
|
mean value: 0.8452559167379526
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.73684211 0.72953216 0.6505848 0.62573099 0.70175439
|
|
0.73099415 0.89181287 0.70321637 0.78362573]
|
|
|
|
mean value: 0.7369883040935673
|
|
|
|
key: train_roc_auc
|
|
value: [0.82335329 0.83832335 0.83878671 0.84473909 0.85669732 0.83575706
|
|
0.83882236 0.82688195 0.82689977 0.82689977]
|
|
|
|
mean value: 0.8357160678642714
|
|
|
|
key: test_jcc
|
|
value: [0.70833333 0.56521739 0.56521739 0.5 0.5 0.52173913
|
|
0.56521739 0.80952381 0.54166667 0.65217391]
|
|
|
|
mean value: 0.5929089026915114
|
|
|
|
key: train_jcc
|
|
value: [0.7020202 0.72307692 0.7244898 0.73469388 0.75129534 0.72361809
|
|
0.72307692 0.70558376 0.70707071 0.70707071]
|
|
|
|
mean value: 0.7201996319369854
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.7503593 0.94796991 0.8130517 0.94401598 1.19774771 0.95386386
|
|
1.11308455 1.17750955 1.02763748 1.08334398]
|
|
|
|
mean value: 1.0008584022521974
|
|
|
|
key: score_time
|
|
value: [0.01199889 0.01201534 0.012115 0.0126338 0.01203322 0.01246309
|
|
0.0122385 0.012321 0.01212645 0.01198578]
|
|
|
|
mean value: 0.012193107604980468
|
|
|
|
key: test_mcc
|
|
value: [0.76376262 0.42640143 0.51793973 0.35484024 0.25301653 0.41299552
|
|
0.51319869 0.7888597 0.45906433 0.62170355]
|
|
|
|
mean value: 0.5111782335846349
|
|
|
|
key: train_mcc
|
|
value: [0.54506656 0.65386793 0.60080046 0.60022832 0.71343028 0.60631371
|
|
0.62418887 0.61866736 0.61288416 0.59402623]
|
|
|
|
mean value: 0.6169473882387252
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.71052632 0.75675676 0.67567568 0.62162162 0.7027027
|
|
0.75675676 0.89189189 0.72972973 0.81081081]
|
|
|
|
mean value: 0.7524893314366998
|
|
|
|
key: train_accuracy
|
|
value: [0.77245509 0.82634731 0.8 0.8 0.85671642 0.80298507
|
|
0.8119403 0.80895522 0.80597015 0.79701493]
|
|
|
|
mean value: 0.808238448476182
|
|
|
|
key: test_fscore
|
|
value: [0.88372093 0.68571429 0.72727273 0.68421053 0.65 0.71794872
|
|
0.76923077 0.88888889 0.73684211 0.82051282]
|
|
|
|
mean value: 0.7564341771379715
|
|
|
|
key: train_fscore
|
|
value: [0.77514793 0.83139535 0.8057971 0.80351906 0.85714286 0.80701754
|
|
0.81415929 0.8128655 0.81049563 0.79640719]
|
|
|
|
mean value: 0.8113947443428973
|
|
|
|
key: test_precision
|
|
value: [0.79166667 0.75 0.8 0.65 0.59090909 0.66666667
|
|
0.75 0.94117647 0.73684211 0.8 ]
|
|
|
|
mean value: 0.7477261000093818
|
|
|
|
key: train_precision
|
|
value: [0.76608187 0.8079096 0.78531073 0.79190751 0.85714286 0.79310345
|
|
0.80232558 0.79428571 0.78977273 0.79640719]
|
|
|
|
mean value: 0.7984247238780199
|
|
|
|
key: test_recall
|
|
value: [1. 0.63157895 0.66666667 0.72222222 0.72222222 0.77777778
|
|
0.78947368 0.84210526 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7730994152046784
|
|
|
|
key: train_recall
|
|
value: [0.78443114 0.85628743 0.82738095 0.81547619 0.85714286 0.82142857
|
|
0.82634731 0.83233533 0.83233533 0.79640719]
|
|
|
|
mean value: 0.8249572284003421
|
|
|
|
key: test_roc_auc
|
|
value: [0.86842105 0.71052632 0.75438596 0.67690058 0.62426901 0.70467836
|
|
0.75584795 0.89327485 0.72953216 0.80994152]
|
|
|
|
mean value: 0.7527777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [0.77245509 0.82634731 0.79991802 0.79995366 0.85671514 0.80292985
|
|
0.81198318 0.80902481 0.80604862 0.79701312]
|
|
|
|
mean value: 0.808238879384089
|
|
|
|
key: test_jcc
|
|
value: [0.79166667 0.52173913 0.57142857 0.52 0.48148148 0.56
|
|
0.625 0.8 0.58333333 0.69565217]
|
|
|
|
mean value: 0.6150301357257879
|
|
|
|
key: train_jcc
|
|
value: [0.63285024 0.71144279 0.67475728 0.67156863 0.75 0.67647059
|
|
0.68656716 0.68472906 0.68137255 0.66169154]
|
|
|
|
mean value: 0.6831449844381896
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01529908 0.01295209 0.01059198 0.01507759 0.01370621 0.01037312
|
|
0.0103159 0.00972629 0.01666164 0.00996923]
|
|
|
|
mean value: 0.012467312812805175
|
|
|
|
key: score_time
|
|
value: [0.01217151 0.01057529 0.00945187 0.01279092 0.00989747 0.00930166
|
|
0.00898147 0.00911093 0.01435661 0.00921655]
|
|
|
|
mean value: 0.010585427284240723
|
|
|
|
key: test_mcc
|
|
value: [0.49923018 0.32732684 0.36315314 0.24850835 0.17372281 0.32780503
|
|
0.35558302 0.51793973 0.13424397 0.46019501]
|
|
|
|
mean value: 0.340770808774687
|
|
|
|
key: train_mcc
|
|
value: [0.36949421 0.38951602 0.39275548 0.44504901 0.42715334 0.37903916
|
|
0.43592042 0.3522313 0.40106796 0.47068416]
|
|
|
|
mean value: 0.4062911058705138
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.65789474 0.67567568 0.59459459 0.56756757 0.64864865
|
|
0.67567568 0.75675676 0.56756757 0.72972973]
|
|
|
|
mean value: 0.6610953058321479
|
|
|
|
key: train_accuracy
|
|
value: [0.67365269 0.68263473 0.68656716 0.71044776 0.70447761 0.66567164
|
|
0.71044776 0.66567164 0.68955224 0.73432836]
|
|
|
|
mean value: 0.6923451604254178
|
|
|
|
key: test_fscore
|
|
value: [0.77272727 0.69767442 0.7 0.68085106 0.65217391 0.69767442
|
|
0.71428571 0.7804878 0.63636364 0.75 ]
|
|
|
|
mean value: 0.708223824233724
|
|
|
|
key: train_fscore
|
|
value: [0.72122762 0.72959184 0.73007712 0.75191816 0.74285714 0.73205742
|
|
0.74270557 0.71282051 0.73195876 0.74498567]
|
|
|
|
mean value: 0.7340199816084876
|
|
|
|
key: test_precision
|
|
value: [0.68 0.625 0.63636364 0.55172414 0.53571429 0.6
|
|
0.65217391 0.72727273 0.56 0.71428571]
|
|
|
|
mean value: 0.6282534414610876
|
|
|
|
key: train_precision
|
|
value: [0.62946429 0.63555556 0.64253394 0.65919283 0.65898618 0.612
|
|
0.66666667 0.62331839 0.64253394 0.71428571]
|
|
|
|
mean value: 0.6484537481402929
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.78947368 0.77777778 0.88888889 0.83333333 0.83333333
|
|
0.78947368 0.84210526 0.73684211 0.78947368]
|
|
|
|
mean value: 0.8175438596491228
|
|
|
|
key: train_recall
|
|
value: [0.84431138 0.85628743 0.8452381 0.875 0.85119048 0.91071429
|
|
0.83832335 0.83233533 0.8502994 0.77844311]
|
|
|
|
mean value: 0.8482142857142857
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.65789474 0.67836257 0.60233918 0.5745614 0.65350877
|
|
0.67251462 0.75438596 0.5628655 0.72807018]
|
|
|
|
mean value: 0.6621345029239767
|
|
|
|
key: train_roc_auc
|
|
value: [0.67365269 0.68263473 0.6860921 0.70995509 0.70403835 0.66493798
|
|
0.71082834 0.66616766 0.69003065 0.73445965]
|
|
|
|
mean value: 0.6922797262617622
|
|
|
|
key: test_jcc
|
|
value: [0.62962963 0.53571429 0.53846154 0.51612903 0.48387097 0.53571429
|
|
0.55555556 0.64 0.46666667 0.6 ]
|
|
|
|
mean value: 0.5501741961741962
|
|
|
|
key: train_jcc
|
|
value: [0.564 0.57429719 0.57489879 0.60245902 0.59090909 0.57735849
|
|
0.5907173 0.55378486 0.57723577 0.59360731]
|
|
|
|
mean value: 0.5799267810478317
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00978756 0.01674604 0.01243091 0.01110196 0.00982499 0.00983
|
|
0.01759958 0.01243544 0.01617932 0.01437855]
|
|
|
|
mean value: 0.013031435012817384
|
|
|
|
key: score_time
|
|
value: [0.00951767 0.01448154 0.00909162 0.00982094 0.00880694 0.00881338
|
|
0.01499009 0.01310825 0.0096364 0.00964451]
|
|
|
|
mean value: 0.010791134834289551
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.31980107 0.29766651 0.36315314 0.04156687 0.35087719
|
|
0.40643275 0.78362573 0.35104619 0.40469382]
|
|
|
|
mean value: 0.3973516955221179
|
|
|
|
key: train_mcc
|
|
value: [0.4794633 0.49930288 0.50588503 0.50465397 0.51786435 0.4881434
|
|
0.48107736 0.41652286 0.50452764 0.48216643]
|
|
|
|
mean value: 0.4879607234355706
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.65789474 0.64864865 0.67567568 0.51351351 0.67567568
|
|
0.7027027 0.89189189 0.67567568 0.7027027 ]
|
|
|
|
mean value: 0.6960170697012802
|
|
|
|
key: train_accuracy
|
|
value: [0.73952096 0.74850299 0.75223881 0.75223881 0.75820896 0.74328358
|
|
0.74029851 0.70746269 0.75223881 0.74029851]
|
|
|
|
mean value: 0.7434292608812226
|
|
|
|
key: test_fscore
|
|
value: [0.8372093 0.62857143 0.60606061 0.7 0.59090909 0.66666667
|
|
0.7027027 0.89473684 0.7 0.71794872]
|
|
|
|
mean value: 0.7044805357290057
|
|
|
|
key: train_fscore
|
|
value: [0.74486804 0.76 0.76217765 0.75659824 0.76790831 0.75428571
|
|
0.74486804 0.7183908 0.74924471 0.74927954]
|
|
|
|
mean value: 0.7507621041515078
|
|
|
|
key: test_precision
|
|
value: [0.75 0.6875 0.66666667 0.63636364 0.5 0.66666667
|
|
0.72222222 0.89473684 0.66666667 0.7 ]
|
|
|
|
mean value: 0.6890822700691122
|
|
|
|
key: train_precision
|
|
value: [0.72988506 0.72677596 0.73480663 0.74566474 0.74033149 0.72527473
|
|
0.72988506 0.69060773 0.75609756 0.72222222]
|
|
|
|
mean value: 0.7301551175937223
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.57894737 0.55555556 0.77777778 0.72222222 0.66666667
|
|
0.68421053 0.89473684 0.73684211 0.73684211]
|
|
|
|
mean value: 0.7301169590643275
|
|
|
|
key: train_recall
|
|
value: [0.76047904 0.79640719 0.79166667 0.76785714 0.79761905 0.78571429
|
|
0.76047904 0.74850299 0.74251497 0.77844311]
|
|
|
|
mean value: 0.7729683490162532
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.65789474 0.64619883 0.67836257 0.51900585 0.6754386
|
|
0.70321637 0.89181287 0.67397661 0.70175439]
|
|
|
|
mean value: 0.6963450292397662
|
|
|
|
key: train_roc_auc
|
|
value: [0.73952096 0.74850299 0.75212076 0.75219204 0.75809096 0.74315654
|
|
0.74035857 0.70758483 0.75220987 0.74041203]
|
|
|
|
mean value: 0.7434149558026804
|
|
|
|
key: test_jcc
|
|
value: [0.72 0.45833333 0.43478261 0.53846154 0.41935484 0.5
|
|
0.54166667 0.80952381 0.53846154 0.56 ]
|
|
|
|
mean value: 0.5520584333852216
|
|
|
|
key: train_jcc
|
|
value: [0.59345794 0.61290323 0.61574074 0.60849057 0.62325581 0.60550459
|
|
0.59345794 0.56053812 0.59903382 0.59907834]
|
|
|
|
mean value: 0.6011461095575721
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01599813 0.01046515 0.01014614 0.00947165 0.01046848 0.01025653
|
|
0.01029468 0.01036191 0.01017714 0.01016212]
|
|
|
|
mean value: 0.01078019142150879
|
|
|
|
key: score_time
|
|
value: [0.01929283 0.01325297 0.01263189 0.01218247 0.01183391 0.01208878
|
|
0.01733851 0.01667285 0.01754093 0.0118258 ]
|
|
|
|
mean value: 0.014466094970703124
|
|
|
|
key: test_mcc
|
|
value: [ 0.16151457 0.26315789 0.02932564 0.19005848 -0.13274856 0.4163404
|
|
-0.07653316 0.4633451 0.29824561 0.24269006]
|
|
|
|
mean value: 0.18553960372885517
|
|
|
|
key: train_mcc
|
|
value: [0.55692617 0.50372605 0.52464806 0.51641419 0.55496659 0.51708324
|
|
0.54666389 0.49948299 0.54695332 0.50474224]
|
|
|
|
mean value: 0.5271606737139082
|
|
|
|
key: test_accuracy
|
|
value: [0.57894737 0.63157895 0.51351351 0.59459459 0.43243243 0.7027027
|
|
0.45945946 0.72972973 0.64864865 0.62162162]
|
|
|
|
mean value: 0.5913229018492177
|
|
|
|
key: train_accuracy
|
|
value: [0.77844311 0.75149701 0.76119403 0.75820896 0.7761194 0.75820896
|
|
0.77313433 0.74925373 0.77313433 0.75223881]
|
|
|
|
mean value: 0.7631432657073912
|
|
|
|
key: test_fscore
|
|
value: [0.61904762 0.63157895 0.52631579 0.59459459 0.46153846 0.64516129
|
|
0.375 0.72222222 0.64864865 0.63157895]
|
|
|
|
mean value: 0.5855686520584653
|
|
|
|
key: train_fscore
|
|
value: [0.77710843 0.75801749 0.77272727 0.75964392 0.78753541 0.76521739
|
|
0.77647059 0.75581395 0.77777778 0.75516224]
|
|
|
|
mean value: 0.7685474479546099
|
|
|
|
key: test_precision
|
|
value: [0.56521739 0.63157895 0.5 0.57894737 0.42857143 0.76923077
|
|
0.46153846 0.76470588 0.66666667 0.63157895]
|
|
|
|
mean value: 0.599803586282251
|
|
|
|
key: train_precision
|
|
value: [0.78181818 0.73863636 0.73913043 0.75739645 0.75135135 0.74576271
|
|
0.76300578 0.73446328 0.76 0.74418605]
|
|
|
|
mean value: 0.7515750596851661
|
|
|
|
key: test_recall
|
|
value: [0.68421053 0.63157895 0.55555556 0.61111111 0.5 0.55555556
|
|
0.31578947 0.68421053 0.63157895 0.63157895]
|
|
|
|
mean value: 0.5801169590643275
|
|
|
|
key: train_recall
|
|
value: [0.77245509 0.77844311 0.80952381 0.76190476 0.82738095 0.78571429
|
|
0.79041916 0.77844311 0.79640719 0.76646707]
|
|
|
|
mean value: 0.7867158540062732
|
|
|
|
key: test_roc_auc
|
|
value: [0.57894737 0.63157895 0.51461988 0.59502924 0.43421053 0.69883041
|
|
0.46345029 0.73099415 0.64912281 0.62134503]
|
|
|
|
mean value: 0.591812865497076
|
|
|
|
key: train_roc_auc
|
|
value: [0.77844311 0.75149701 0.76104933 0.75819789 0.77596593 0.7581266
|
|
0.77318577 0.7493406 0.77320359 0.75228115]
|
|
|
|
mean value: 0.7631290989449673
|
|
|
|
key: test_jcc
|
|
value: [0.44827586 0.46153846 0.35714286 0.42307692 0.3 0.47619048
|
|
0.23076923 0.56521739 0.48 0.46153846]
|
|
|
|
mean value: 0.42037496636297234
|
|
|
|
key: train_jcc
|
|
value: [0.63546798 0.61032864 0.62962963 0.61244019 0.64953271 0.61971831
|
|
0.63461538 0.60747664 0.63636364 0.60663507]
|
|
|
|
mean value: 0.6242208187533025
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02017927 0.01677966 0.01893783 0.01648068 0.01850128 0.0192368
|
|
0.01685691 0.01733661 0.01722908 0.01997876]
|
|
|
|
mean value: 0.018151688575744628
|
|
|
|
key: score_time
|
|
value: [0.01204991 0.0105381 0.01109457 0.01102734 0.01069355 0.01100254
|
|
0.01266527 0.01212406 0.01152778 0.01219964]
|
|
|
|
mean value: 0.011492276191711425
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.37047929 0.45906433 0.41299552 0.26327408 0.45906433
|
|
0.35484024 0.78362573 0.35104619 0.6754386 ]
|
|
|
|
mean value: 0.47844819716987913
|
|
|
|
key: train_mcc
|
|
value: [0.68867214 0.74359559 0.71347093 0.70190853 0.73134914 0.67800982
|
|
0.71953089 0.7074779 0.70233373 0.67766887]
|
|
|
|
mean value: 0.7064017543848087
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.68421053 0.72972973 0.7027027 0.62162162 0.72972973
|
|
0.67567568 0.89189189 0.67567568 0.83783784]
|
|
|
|
mean value: 0.7364864864864865
|
|
|
|
key: train_accuracy
|
|
value: [0.84431138 0.87125749 0.85671642 0.85074627 0.86567164 0.83880597
|
|
0.85970149 0.85373134 0.85074627 0.83880597]
|
|
|
|
mean value: 0.8530494235409778
|
|
|
|
key: test_fscore
|
|
value: [0.8372093 0.66666667 0.72222222 0.71794872 0.66666667 0.72222222
|
|
0.66666667 0.89473684 0.7 0.84210526]
|
|
|
|
mean value: 0.7436444569981902
|
|
|
|
key: train_fscore
|
|
value: [0.8452381 0.87463557 0.85798817 0.85380117 0.86646884 0.84210526
|
|
0.86053412 0.85373134 0.85380117 0.83928571]
|
|
|
|
mean value: 0.8547589456699216
|
|
|
|
key: test_precision
|
|
value: [0.75 0.70588235 0.72222222 0.66666667 0.58333333 0.72222222
|
|
0.70588235 0.89473684 0.66666667 0.84210526]
|
|
|
|
mean value: 0.7259717922256622
|
|
|
|
key: train_precision
|
|
value: [0.84023669 0.85227273 0.85294118 0.83908046 0.86390533 0.82758621
|
|
0.85294118 0.85119048 0.83428571 0.83431953]
|
|
|
|
mean value: 0.8448759475818299
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.63157895 0.72222222 0.77777778 0.77777778 0.72222222
|
|
0.63157895 0.89473684 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7684210526315789
|
|
|
|
key: train_recall
|
|
value: [0.8502994 0.89820359 0.86309524 0.86904762 0.86904762 0.85714286
|
|
0.86826347 0.85628743 0.8742515 0.84431138]
|
|
|
|
mean value: 0.8649950099800399
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.68421053 0.72953216 0.70467836 0.62573099 0.72953216
|
|
0.67690058 0.89181287 0.67397661 0.8377193 ]
|
|
|
|
mean value: 0.7369883040935673
|
|
|
|
key: train_roc_auc
|
|
value: [0.84431138 0.87125749 0.85669732 0.85069147 0.86566153 0.83875107
|
|
0.85972697 0.85373895 0.85081622 0.83882236]
|
|
|
|
mean value: 0.8530474764756202
|
|
|
|
key: test_jcc
|
|
value: [0.72 0.5 0.56521739 0.56 0.5 0.56521739
|
|
0.5 0.80952381 0.53846154 0.72727273]
|
|
|
|
mean value: 0.5985692857866771
|
|
|
|
key: train_jcc
|
|
value: [0.73195876 0.77720207 0.75129534 0.74489796 0.76439791 0.72727273
|
|
0.75520833 0.74479167 0.74489796 0.72307692]
|
|
|
|
mean value: 0.7464999646689182
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.3681643 1.31118202 1.42917252 1.24520016 1.43492532 1.45483589
|
|
1.31954503 1.37193584 1.29898882 1.43966317]
|
|
|
|
mean value: 1.367361307144165
|
|
|
|
key: score_time
|
|
value: [0.02325869 0.01489902 0.01540542 0.01515889 0.02326679 0.01336527
|
|
0.01248026 0.01519704 0.0153687 0.02362657]
|
|
|
|
mean value: 0.01720266342163086
|
|
|
|
key: test_mcc
|
|
value: [0.52704628 0.31980107 0.57184997 0.4633451 0.39648395 0.48078072
|
|
0.42489158 0.56934383 0.51461988 0.62280702]
|
|
|
|
mean value: 0.4890969396644261
|
|
|
|
key: train_mcc
|
|
value: [0.98205353 0.98802395 0.98812962 0.9880596 0.98224601 0.96424625
|
|
0.99404762 0.99404762 0.96480168 0.98813046]
|
|
|
|
mean value: 0.9833786340272181
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.65789474 0.78378378 0.72972973 0.67567568 0.72972973
|
|
0.7027027 0.78378378 0.75675676 0.81081081]
|
|
|
|
mean value: 0.739402560455192
|
|
|
|
key: train_accuracy
|
|
value: [0.99101796 0.99401198 0.99402985 0.99402985 0.99104478 0.98208955
|
|
0.99701493 0.99701493 0.98208955 0.99402985]
|
|
|
|
mean value: 0.991637322370185
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.62857143 0.78947368 0.73684211 0.72727273 0.66666667
|
|
0.66666667 0.8 0.75675676 0.81081081]
|
|
|
|
mean value: 0.735229161544951
|
|
|
|
key: train_fscore
|
|
value: [0.99104478 0.99401198 0.99408284 0.99404762 0.99115044 0.98224852
|
|
0.99701493 0.99701493 0.98235294 0.99404762]
|
|
|
|
mean value: 0.9917016585609906
|
|
|
|
key: test_precision
|
|
value: [0.75 0.6875 0.75 0.7 0.61538462 0.83333333
|
|
0.78571429 0.76190476 0.77777778 0.83333333]
|
|
|
|
mean value: 0.7494948107448107
|
|
|
|
key: train_precision
|
|
value: [0.98809524 0.99401198 0.98823529 0.99404762 0.98245614 0.97647059
|
|
0.99404762 0.99404762 0.96531792 0.98816568]
|
|
|
|
mean value: 0.9864895693538335
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.57894737 0.83333333 0.77777778 0.88888889 0.55555556
|
|
0.57894737 0.84210526 0.73684211 0.78947368]
|
|
|
|
mean value: 0.7371345029239766
|
|
|
|
key: train_recall
|
|
value: [0.99401198 0.99401198 1. 0.99404762 1. 0.98809524
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9970166809238665
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.65789474 0.78508772 0.73099415 0.68128655 0.7251462
|
|
0.70614035 0.78216374 0.75730994 0.81140351]
|
|
|
|
mean value: 0.7400584795321637
|
|
|
|
key: train_roc_auc
|
|
value: [0.99101796 0.99401198 0.99401198 0.9940298 0.99101796 0.98207157
|
|
0.99702381 0.99702381 0.98214286 0.99404762]
|
|
|
|
mean value: 0.9916399344168806
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.45833333 0.65217391 0.58333333 0.57142857 0.5
|
|
0.5 0.66666667 0.60869565 0.68181818]
|
|
|
|
mean value: 0.5847449651797478
|
|
|
|
key: train_jcc
|
|
value: [0.98224852 0.98809524 0.98823529 0.98816568 0.98245614 0.96511628
|
|
0.99404762 0.99404762 0.96531792 0.98816568]
|
|
|
|
mean value: 0.9835895990460717
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02768469 0.02401519 0.02204943 0.03315425 0.02625251 0.02455163
|
|
0.02659726 0.02823281 0.02084541 0.02443576]
|
|
|
|
mean value: 0.025781893730163576
|
|
|
|
key: score_time
|
|
value: [0.01190305 0.00906563 0.00879693 0.01450753 0.00914645 0.00912809
|
|
0.01328111 0.01062655 0.00963855 0.00995922]
|
|
|
|
mean value: 0.010605311393737793
|
|
|
|
key: test_mcc
|
|
value: [0.58218174 0.58218174 0.51461988 0.40643275 0.29824561 0.62170355
|
|
0.31339521 0.67849265 0.40469382 0.62170355]
|
|
|
|
mean value: 0.5023650499200868
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.78947368 0.75675676 0.7027027 0.64864865 0.81081081
|
|
0.64864865 0.83783784 0.7027027 0.81081081]
|
|
|
|
mean value: 0.7497866287339972
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.77777778 0.75675676 0.7027027 0.64864865 0.8
|
|
0.60606061 0.85 0.71794872 0.82051282]
|
|
|
|
mean value: 0.748040803040803
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76190476 0.82352941 0.73684211 0.68421053 0.63157895 0.82352941
|
|
0.71428571 0.80952381 0.7 0.8 ]
|
|
|
|
mean value: 0.7485404688191066
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.73684211 0.77777778 0.72222222 0.66666667 0.77777778
|
|
0.52631579 0.89473684 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7523391812865496
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.78947368 0.75730994 0.70321637 0.64912281 0.80994152
|
|
0.65204678 0.83625731 0.70175439 0.80994152]
|
|
|
|
mean value: 0.7498538011695907
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.63636364 0.60869565 0.54166667 0.48 0.66666667
|
|
0.43478261 0.73913043 0.56 0.69565217]
|
|
|
|
mean value: 0.6029624505928853
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13470531 0.13264561 0.1452086 0.13969755 0.13984036 0.11828136
|
|
0.122087 0.11941075 0.12233281 0.12006497]
|
|
|
|
mean value: 0.12942743301391602
|
|
|
|
key: score_time
|
|
value: [0.0178175 0.01871252 0.01925445 0.01819825 0.01904869 0.01905203
|
|
0.02677774 0.01779652 0.02003813 0.01874638]
|
|
|
|
mean value: 0.019544219970703124
|
|
|
|
key: test_mcc
|
|
value: [0.53300179 0.43643578 0.4633451 0.57184997 0.37654316 0.57184997
|
|
0.52214434 0.67849265 0.4633451 0.68035483]
|
|
|
|
mean value: 0.5297362681053938
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.71052632 0.72972973 0.78378378 0.67567568 0.78378378
|
|
0.75675676 0.83783784 0.72972973 0.83783784]
|
|
|
|
mean value: 0.7608819345661451
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.66666667 0.73684211 0.78947368 0.71428571 0.78947368
|
|
0.74285714 0.85 0.72222222 0.83333333]
|
|
|
|
mean value: 0.7625642357927338
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.78571429 0.7 0.75 0.625 0.75
|
|
0.8125 0.80952381 0.76470588 0.88235294]
|
|
|
|
mean value: 0.7607069646040234
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.57894737 0.77777778 0.83333333 0.83333333 0.83333333
|
|
0.68421053 0.89473684 0.68421053 0.78947368]
|
|
|
|
mean value: 0.7751461988304094
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.71052632 0.73099415 0.78508772 0.67982456 0.78508772
|
|
0.75877193 0.83625731 0.73099415 0.83918129]
|
|
|
|
mean value: 0.7619883040935672
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.5 0.58333333 0.65217391 0.55555556 0.65217391
|
|
0.59090909 0.73913043 0.56521739 0.71428571]
|
|
|
|
mean value: 0.6192779346257608
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01108074 0.01164365 0.0096972 0.00977159 0.01087046 0.01061988
|
|
0.0106287 0.01075363 0.01068521 0.00981307]
|
|
|
|
mean value: 0.010556411743164063
|
|
|
|
key: score_time
|
|
value: [0.01089883 0.00914264 0.00875044 0.0089798 0.00961828 0.00974083
|
|
0.00962353 0.00962925 0.00901365 0.00938201]
|
|
|
|
mean value: 0.009477925300598145
|
|
|
|
key: test_mcc
|
|
value: [0.21320072 0.42640143 0.74044197 0.35087719 0.02932564 0.35087719
|
|
0.19469789 0.25301653 0.30384671 0.52214434]
|
|
|
|
mean value: 0.3384829618110117
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.60526316 0.71052632 0.86486486 0.67567568 0.51351351 0.67567568
|
|
0.59459459 0.62162162 0.64864865 0.75675676]
|
|
|
|
mean value: 0.6667140825035562
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.68571429 0.87179487 0.66666667 0.52631579 0.66666667
|
|
0.57142857 0.58823529 0.62857143 0.74285714]
|
|
|
|
mean value: 0.6519679288719537
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 0.75 0.80952381 0.66666667 0.5 0.66666667
|
|
0.625 0.66666667 0.6875 0.8125 ]
|
|
|
|
mean value: 0.680952380952381
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.52631579 0.63157895 0.94444444 0.66666667 0.55555556 0.66666667
|
|
0.52631579 0.52631579 0.57894737 0.68421053]
|
|
|
|
mean value: 0.6307017543859649
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.60526316 0.71052632 0.86695906 0.6754386 0.51461988 0.6754386
|
|
0.59649123 0.62426901 0.6505848 0.75877193]
|
|
|
|
mean value: 0.6678362573099415
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.52173913 0.77272727 0.5 0.35714286 0.5
|
|
0.4 0.41666667 0.45833333 0.59090909]
|
|
|
|
mean value: 0.49175183512140036
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.70662832 1.68128824 1.69021344 1.72488284 1.71076632 1.70421863
|
|
1.68081808 1.59992051 1.65905738 1.84206057]
|
|
|
|
mean value: 1.699985432624817
|
|
|
|
key: score_time
|
|
value: [0.10135674 0.0981214 0.09128618 0.09769154 0.101897 0.12188268
|
|
0.09971976 0.09218359 0.11544991 0.09854841]
|
|
|
|
mean value: 0.10181372165679932
|
|
|
|
key: test_mcc
|
|
value: [0.58218174 0.63960215 0.45906433 0.62280702 0.19469789 0.62170355
|
|
0.37654316 0.7888597 0.45906433 0.63129316]
|
|
|
|
mean value: 0.5375817013064518
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.81578947 0.72972973 0.81081081 0.59459459 0.81081081
|
|
0.67567568 0.89189189 0.72972973 0.81081081]
|
|
|
|
mean value: 0.7659317211948791
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.8 0.72222222 0.81081081 0.61538462 0.8
|
|
0.625 0.88888889 0.73684211 0.8 ]
|
|
|
|
mean value: 0.7599148642569695
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76190476 0.875 0.72222222 0.78947368 0.57142857 0.82352941
|
|
0.76923077 0.94117647 0.73684211 0.875 ]
|
|
|
|
mean value: 0.786580799661295
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.73684211 0.72222222 0.83333333 0.66666667 0.77777778
|
|
0.52631579 0.84210526 0.73684211 0.73684211]
|
|
|
|
mean value: 0.7421052631578947
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.81578947 0.72953216 0.81140351 0.59649123 0.80994152
|
|
0.67982456 0.89327485 0.72953216 0.8128655 ]
|
|
|
|
mean value: 0.7668128654970761
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.66666667 0.56521739 0.68181818 0.44444444 0.66666667
|
|
0.45454545 0.8 0.58333333 0.66666667]
|
|
|
|
mean value: 0.6196025472112429
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.91720247 1.01498485 0.95184493 0.9032371 0.93344951 0.91477418
|
|
0.94696403 0.98821211 0.92723608 0.92064309]
|
|
|
|
mean value: 0.9418548345565796
|
|
|
|
key: score_time
|
|
value: [0.24337864 0.24633884 0.26120234 0.22425628 0.1785531 0.25081897
|
|
0.27004051 0.25512719 0.21345758 0.14258361]
|
|
|
|
mean value: 0.2285757064819336
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.58218174 0.45906433 0.57184997 0.30384671 0.56934383
|
|
0.36315314 0.7888597 0.40643275 0.68035483]
|
|
|
|
mean value: 0.5357542517480304
|
|
|
|
key: train_mcc
|
|
value: [0.9044511 0.88700711 0.88065448 0.91647953 0.89852076 0.89278334
|
|
0.89312092 0.89279859 0.89865612 0.88697146]
|
|
|
|
mean value: 0.8951443406649856
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.78947368 0.72972973 0.78378378 0.64864865 0.78378378
|
|
0.67567568 0.89189189 0.7027027 0.83783784]
|
|
|
|
mean value: 0.7659317211948791
|
|
|
|
key: train_accuracy
|
|
value: [0.95209581 0.94311377 0.94029851 0.95820896 0.94925373 0.94626866
|
|
0.94626866 0.94626866 0.94925373 0.94328358]
|
|
|
|
mean value: 0.9474314058450264
|
|
|
|
key: test_fscore
|
|
value: [0.82051282 0.77777778 0.72222222 0.78947368 0.66666667 0.76470588
|
|
0.64705882 0.88888889 0.7027027 0.83333333]
|
|
|
|
mean value: 0.7613342802197292
|
|
|
|
key: train_fscore
|
|
value: [0.95266272 0.94428152 0.9408284 0.95857988 0.9495549 0.94705882
|
|
0.94705882 0.94674556 0.9495549 0.9439528 ]
|
|
|
|
mean value: 0.9480278334677595
|
|
|
|
key: test_precision
|
|
value: [0.8 0.82352941 0.72222222 0.75 0.61904762 0.8125
|
|
0.73333333 0.94117647 0.72222222 0.88235294]
|
|
|
|
mean value: 0.7806384220354808
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
train_precision
|
|
value: [0.94152047 0.92528736 0.93529412 0.95294118 0.94674556 0.93604651
|
|
0.93063584 0.93567251 0.94117647 0.93023256]
|
|
|
|
mean value: 0.937555257353177
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.73684211 0.72222222 0.83333333 0.72222222 0.72222222
|
|
0.57894737 0.84210526 0.68421053 0.78947368]
|
|
|
|
mean value: 0.7473684210526316
|
|
|
|
key: train_recall
|
|
value: [0.96407186 0.96407186 0.94642857 0.96428571 0.95238095 0.95833333
|
|
0.96407186 0.95808383 0.95808383 0.95808383]
|
|
|
|
mean value: 0.9587895637296835
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.78947368 0.72953216 0.78508772 0.6505848 0.78216374
|
|
0.67836257 0.89327485 0.70321637 0.83918129]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.95209581 0.94311377 0.94028015 0.95819076 0.94924437 0.94623253
|
|
0.94632164 0.94630382 0.94928001 0.94332763]
|
|
|
|
mean value: 0.9474390504704876
|
|
|
|
key: test_jcc
|
|
value: [0.69565217 0.63636364 0.56521739 0.65217391 0.5 0.61904762
|
|
0.47826087 0.8 0.54166667 0.71428571]
|
|
|
|
mean value: 0.6202667984189724
|
|
|
|
key: train_jcc
|
|
value: [0.90960452 0.89444444 0.88826816 0.92045455 0.9039548 0.89944134
|
|
0.89944134 0.8988764 0.9039548 0.89385475]
|
|
|
|
mean value: 0.9012295105279335
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02634192 0.00982213 0.01019454 0.00987482 0.00994039 0.01071048
|
|
0.00985861 0.00977159 0.01009774 0.01132774]
|
|
|
|
mean value: 0.011793994903564453
|
|
|
|
key: score_time
|
|
value: [0.01047301 0.00883603 0.07131529 0.00893092 0.009161 0.00895071
|
|
0.00883913 0.00876498 0.00909233 0.00948286]
|
|
|
|
mean value: 0.015384626388549805
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.31980107 0.29766651 0.36315314 0.04156687 0.35087719
|
|
0.40643275 0.78362573 0.35104619 0.40469382]
|
|
|
|
mean value: 0.3973516955221179
|
|
|
|
key: train_mcc
|
|
value: [0.4794633 0.49930288 0.50588503 0.50465397 0.51786435 0.4881434
|
|
0.48107736 0.41652286 0.50452764 0.48216643]
|
|
|
|
mean value: 0.4879607234355706
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.65789474 0.64864865 0.67567568 0.51351351 0.67567568
|
|
0.7027027 0.89189189 0.67567568 0.7027027 ]
|
|
|
|
mean value: 0.6960170697012802
|
|
|
|
key: train_accuracy
|
|
value: [0.73952096 0.74850299 0.75223881 0.75223881 0.75820896 0.74328358
|
|
0.74029851 0.70746269 0.75223881 0.74029851]
|
|
|
|
mean value: 0.7434292608812226
|
|
|
|
key: test_fscore
|
|
value: [0.8372093 0.62857143 0.60606061 0.7 0.59090909 0.66666667
|
|
0.7027027 0.89473684 0.7 0.71794872]
|
|
|
|
mean value: 0.7044805357290057
|
|
|
|
key: train_fscore
|
|
value: [0.74486804 0.76 0.76217765 0.75659824 0.76790831 0.75428571
|
|
0.74486804 0.7183908 0.74924471 0.74927954]
|
|
|
|
mean value: 0.7507621041515078
|
|
|
|
key: test_precision
|
|
value: [0.75 0.6875 0.66666667 0.63636364 0.5 0.66666667
|
|
0.72222222 0.89473684 0.66666667 0.7 ]
|
|
|
|
mean value: 0.6890822700691122
|
|
|
|
key: train_precision
|
|
value: [0.72988506 0.72677596 0.73480663 0.74566474 0.74033149 0.72527473
|
|
0.72988506 0.69060773 0.75609756 0.72222222]
|
|
|
|
mean value: 0.7301551175937223
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.57894737 0.55555556 0.77777778 0.72222222 0.66666667
|
|
0.68421053 0.89473684 0.73684211 0.73684211]
|
|
|
|
mean value: 0.7301169590643275
|
|
|
|
key: train_recall
|
|
value: [0.76047904 0.79640719 0.79166667 0.76785714 0.79761905 0.78571429
|
|
0.76047904 0.74850299 0.74251497 0.77844311]
|
|
|
|
mean value: 0.7729683490162532
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.65789474 0.64619883 0.67836257 0.51900585 0.6754386
|
|
0.70321637 0.89181287 0.67397661 0.70175439]
|
|
|
|
mean value: 0.6963450292397662
|
|
|
|
key: train_roc_auc
|
|
value: [0.73952096 0.74850299 0.75212076 0.75219204 0.75809096 0.74315654
|
|
0.74035857 0.70758483 0.75220987 0.74041203]
|
|
|
|
mean value: 0.7434149558026804
|
|
|
|
key: test_jcc
|
|
value: [0.72 0.45833333 0.43478261 0.53846154 0.41935484 0.5
|
|
0.54166667 0.80952381 0.53846154 0.56 ]
|
|
|
|
mean value: 0.5520584333852216
|
|
|
|
key: train_jcc
|
|
value: [0.59345794 0.61290323 0.61574074 0.60849057 0.62325581 0.60550459
|
|
0.59345794 0.56053812 0.59903382 0.59907834]
|
|
|
|
mean value: 0.6011461095575721
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.27281094 0.10678363 0.18896961 0.29932785 0.0719316 0.08411741
|
|
0.23973918 0.0734241 0.08263278 0.07509923]
|
|
|
|
mean value: 0.14948363304138185
|
|
|
|
key: score_time
|
|
value: [0.01172304 0.01110888 0.01211238 0.01109719 0.0110867 0.011971
|
|
0.01119184 0.01092505 0.01101446 0.01110554]
|
|
|
|
mean value: 0.011333608627319336
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.73786479 0.57184997 0.40643275 0.29824561 0.78362573
|
|
0.58342636 0.7888597 0.73020842 0.74044197]
|
|
|
|
mean value: 0.6273410829887742
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.86842105 0.78378378 0.7027027 0.64864865 0.89189189
|
|
0.78378378 0.89189189 0.86486486 0.86486486]
|
|
|
|
mean value: 0.8116642958748223
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82051282 0.87179487 0.78947368 0.7027027 0.64864865 0.88888889
|
|
0.76470588 0.88888889 0.87179487 0.85714286]
|
|
|
|
mean value: 0.8104554116938018
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.85 0.75 0.68421053 0.63157895 0.88888889
|
|
0.86666667 0.94117647 0.85 0.9375 ]
|
|
|
|
mean value: 0.8200021499828001
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.89473684 0.83333333 0.72222222 0.66666667 0.88888889
|
|
0.68421053 0.84210526 0.89473684 0.78947368]
|
|
|
|
mean value: 0.8058479532163743
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.86842105 0.78508772 0.70321637 0.64912281 0.89181287
|
|
0.78654971 0.89327485 0.86403509 0.86695906]
|
|
|
|
mean value: 0.8124269005847954
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.69565217 0.77272727 0.65217391 0.54166667 0.48 0.8
|
|
0.61904762 0.8 0.77272727 0.75 ]
|
|
|
|
mean value: 0.6883994918125353
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02827716 0.03424811 0.03768492 0.06055117 0.06715417 0.04611444
|
|
0.06580687 0.07130408 0.06847095 0.06775451]
|
|
|
|
mean value: 0.05473663806915283
|
|
|
|
key: score_time
|
|
value: [0.01207066 0.01202059 0.02242541 0.02408552 0.01220036 0.02165437
|
|
0.02361369 0.02165532 0.02262092 0.02089739]
|
|
|
|
mean value: 0.019324421882629395
|
|
|
|
key: test_mcc
|
|
value: [0.36842105 0.21081851 0.4633451 0.13450292 0.44331728 0.52960948
|
|
0.29824561 0.62280702 0.36315314 0.62170355]
|
|
|
|
mean value: 0.4055923683655079
|
|
|
|
key: train_mcc
|
|
value: [0.82041812 0.81473651 0.83882236 0.81527061 0.83881661 0.78531196
|
|
0.82700603 0.78531196 0.81504458 0.7792393 ]
|
|
|
|
mean value: 0.8119978040512705
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.60526316 0.72972973 0.56756757 0.7027027 0.75675676
|
|
0.64864865 0.81081081 0.67567568 0.81081081]
|
|
|
|
mean value: 0.6992176386913229
|
|
|
|
key: train_accuracy
|
|
value: [0.91017964 0.90718563 0.91940299 0.90746269 0.91940299 0.89253731
|
|
0.91343284 0.89253731 0.90746269 0.88955224]
|
|
|
|
mean value: 0.9059156314237197
|
|
|
|
key: test_fscore
|
|
value: [0.68421053 0.59459459 0.73684211 0.55555556 0.74418605 0.70967742
|
|
0.64864865 0.81081081 0.64705882 0.82051282]
|
|
|
|
mean value: 0.6952097351097256
|
|
|
|
key: train_fscore
|
|
value: [0.91071429 0.90855457 0.91940299 0.90909091 0.91988131 0.89156627
|
|
0.91394659 0.89349112 0.90634441 0.89020772]
|
|
|
|
mean value: 0.9063200160656543
|
|
|
|
key: test_precision
|
|
value: [0.68421053 0.61111111 0.7 0.55555556 0.64 0.84615385
|
|
0.66666667 0.83333333 0.73333333 0.8 ]
|
|
|
|
mean value: 0.7070364372469635
|
|
|
|
key: train_precision
|
|
value: [0.90532544 0.89534884 0.92215569 0.89595376 0.91715976 0.90243902
|
|
0.90588235 0.88304094 0.91463415 0.88235294]
|
|
|
|
mean value: 0.9024292890679951
|
|
|
|
key: test_recall
|
|
value: [0.68421053 0.57894737 0.77777778 0.55555556 0.88888889 0.61111111
|
|
0.63157895 0.78947368 0.57894737 0.84210526]
|
|
|
|
mean value: 0.6938596491228071
|
|
|
|
key: train_recall
|
|
value: [0.91616766 0.92215569 0.91666667 0.92261905 0.92261905 0.88095238
|
|
0.92215569 0.90419162 0.89820359 0.89820359]
|
|
|
|
mean value: 0.910393498716852
|
|
|
|
key: test_roc_auc
|
|
value: [0.68421053 0.60526316 0.73099415 0.56725146 0.70760234 0.75292398
|
|
0.64912281 0.81140351 0.67836257 0.80994152]
|
|
|
|
mean value: 0.6997076023391813
|
|
|
|
key: train_roc_auc
|
|
value: [0.91017964 0.90718563 0.91941118 0.90741731 0.91939336 0.892572
|
|
0.9134588 0.892572 0.90743513 0.88957799]
|
|
|
|
mean value: 0.9059203022526375
|
|
|
|
key: test_jcc
|
|
value: [0.52 0.42307692 0.58333333 0.38461538 0.59259259 0.55
|
|
0.48 0.68181818 0.47826087 0.69565217]
|
|
|
|
mean value: 0.5389349458914676
|
|
|
|
key: train_jcc
|
|
value: [0.83606557 0.83243243 0.85082873 0.83333333 0.85164835 0.80434783
|
|
0.84153005 0.80748663 0.82872928 0.80213904]
|
|
|
|
mean value: 0.8288541251415297
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01347089 0.0098455 0.01079345 0.00932312 0.01048756 0.0106535
|
|
0.00964189 0.00965834 0.01051116 0.00936913]
|
|
|
|
mean value: 0.010375452041625977
|
|
|
|
key: score_time
|
|
value: [0.01401138 0.00987387 0.00918579 0.00872779 0.00984526 0.00893331
|
|
0.00875735 0.0098927 0.00953364 0.00865412]
|
|
|
|
mean value: 0.009741520881652832
|
|
|
|
key: test_mcc
|
|
value: [0.53300179 0.10660036 0.35104619 0.39648395 0.39648395 0.20189884
|
|
0.24269006 0.56725146 0.29766651 0.51793973]
|
|
|
|
mean value: 0.361106284731234
|
|
|
|
key: train_mcc
|
|
value: [0.41044367 0.45305024 0.40692678 0.37777206 0.39938487 0.44020203
|
|
0.43583391 0.39407361 0.38777849 0.44285443]
|
|
|
|
mean value: 0.4148320088648423
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.55263158 0.67567568 0.67567568 0.67567568 0.59459459
|
|
0.62162162 0.78378378 0.64864865 0.75675676]
|
|
|
|
mean value: 0.6748221906116643
|
|
|
|
key: train_accuracy
|
|
value: [0.70359281 0.7245509 0.70149254 0.68656716 0.69850746 0.71940299
|
|
0.71641791 0.69552239 0.69253731 0.71940299]
|
|
|
|
mean value: 0.7057994458843507
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.58536585 0.64705882 0.72727273 0.72727273 0.63414634
|
|
0.63157895 0.78947368 0.68292683 0.7804878 ]
|
|
|
|
mean value: 0.6986071543800155
|
|
|
|
key: train_fscore
|
|
value: [0.72112676 0.74157303 0.72222222 0.7107438 0.71549296 0.73142857
|
|
0.73087819 0.71186441 0.7082153 0.73595506]
|
|
|
|
mean value: 0.7229500294700111
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.54545455 0.6875 0.61538462 0.61538462 0.56521739
|
|
0.63157895 0.78947368 0.63636364 0.72727273]
|
|
|
|
mean value: 0.6540902890016163
|
|
|
|
key: train_precision
|
|
value: [0.68085106 0.6984127 0.67708333 0.66153846 0.67914439 0.7032967
|
|
0.69354839 0.67379679 0.67204301 0.69312169]
|
|
|
|
mean value: 0.6832836527852728
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.63157895 0.61111111 0.88888889 0.88888889 0.72222222
|
|
0.63157895 0.78947368 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7584795321637426
|
|
|
|
key: train_recall
|
|
value: [0.76646707 0.79041916 0.77380952 0.76785714 0.75595238 0.76190476
|
|
0.77245509 0.75449102 0.74850299 0.78443114]
|
|
|
|
mean value: 0.7676290276589678
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.55263158 0.67397661 0.68128655 0.68128655 0.59795322
|
|
0.62134503 0.78362573 0.64619883 0.75438596]
|
|
|
|
mean value: 0.6755847953216374
|
|
|
|
key: train_roc_auc
|
|
value: [0.70359281 0.7245509 0.70127602 0.68632378 0.69833547 0.71927573
|
|
0.71658469 0.69569789 0.69270388 0.71959652]
|
|
|
|
mean value: 0.7057937696036498
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.4137931 0.47826087 0.57142857 0.57142857 0.46428571
|
|
0.46153846 0.65217391 0.51851852 0.64 ]
|
|
|
|
mean value: 0.5411427723256809
|
|
|
|
key: train_jcc
|
|
value: [0.56387665 0.58928571 0.56521739 0.55128205 0.55701754 0.57657658
|
|
0.57589286 0.55263158 0.54824561 0.58222222]
|
|
|
|
mean value: 0.5662248201638254
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01478553 0.01501584 0.01634312 0.01609015 0.02087545 0.01536274
|
|
0.01886725 0.01932836 0.025141 0.01783991]
|
|
|
|
mean value: 0.017964935302734374
|
|
|
|
key: score_time
|
|
value: [0.00964785 0.01131296 0.01162481 0.0114944 0.01235485 0.01182532
|
|
0.01240325 0.01643777 0.01251268 0.01244211]
|
|
|
|
mean value: 0.01220560073852539
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.25819889 0.24559613 0.36315314 0.29975701 0.45644817
|
|
0.35104619 0.67434178 0.51461988 0.42820746]
|
|
|
|
mean value: 0.4107766453870688
|
|
|
|
key: train_mcc
|
|
value: [0.47102886 0.49988189 0.3513147 0.75054607 0.59875156 0.59156045
|
|
0.69594882 0.50616864 0.69082719 0.34258615]
|
|
|
|
mean value: 0.549861433159125
|
|
|
|
key: test_accuracy
|
|
value: [0.71052632 0.60526316 0.56756757 0.67567568 0.62162162 0.7027027
|
|
0.67567568 0.81081081 0.75675676 0.64864865]
|
|
|
|
mean value: 0.677524893314367
|
|
|
|
key: train_accuracy
|
|
value: [0.68862275 0.70658683 0.60895522 0.87462687 0.7761194 0.77313433
|
|
0.83880597 0.70447761 0.8358209 0.60597015]
|
|
|
|
mean value: 0.7413120028599517
|
|
|
|
key: test_fscore
|
|
value: [0.7755102 0.69387755 0.2 0.7 0.69565217 0.59259259
|
|
0.7 0.77419355 0.75675676 0.48 ]
|
|
|
|
mean value: 0.636858282675153
|
|
|
|
key: train_fscore
|
|
value: [0.76036866 0.77102804 0.36097561 0.87861272 0.81296758 0.72058824
|
|
0.85483871 0.5787234 0.85254692 0.34653465]
|
|
|
|
mean value: 0.6937184528126416
|
|
|
|
key: test_precision
|
|
value: [0.63333333 0.56666667 1. 0.63636364 0.57142857 0.88888889
|
|
0.66666667 1. 0.77777778 1. ]
|
|
|
|
mean value: 0.7741125541125541
|
|
|
|
key: train_precision
|
|
value: [0.61797753 0.63218391 1. 0.85393258 0.69957082 0.94230769
|
|
0.77560976 1. 0.77184466 1. ]
|
|
|
|
mean value: 0.82934269444556
|
|
|
|
key: test_recall
|
|
value: [1. 0.89473684 0.11111111 0.77777778 0.88888889 0.44444444
|
|
0.73684211 0.63157895 0.73684211 0.31578947]
|
|
|
|
mean value: 0.6538011695906433
|
|
|
|
key: train_recall
|
|
value: [0.98802395 0.98802395 0.2202381 0.9047619 0.9702381 0.58333333
|
|
0.95209581 0.40718563 0.95209581 0.20958084]
|
|
|
|
mean value: 0.717557741659538
|
|
|
|
key: test_roc_auc
|
|
value: [0.71052632 0.60526316 0.55555556 0.67836257 0.62865497 0.69590643
|
|
0.67397661 0.81578947 0.75730994 0.65789474]
|
|
|
|
mean value: 0.6779239766081872
|
|
|
|
key: train_roc_auc
|
|
value: [0.68862275 0.70658683 0.61011905 0.87453664 0.77553821 0.77370259
|
|
0.83914314 0.70359281 0.83616695 0.60479042]
|
|
|
|
mean value: 0.7412799401197605
|
|
|
|
key: test_jcc
|
|
value: [0.63333333 0.53125 0.11111111 0.53846154 0.53333333 0.42105263
|
|
0.53846154 0.63157895 0.60869565 0.31578947]
|
|
|
|
mean value: 0.48630675595063466
|
|
|
|
key: train_jcc
|
|
value: [0.6133829 0.62737643 0.2202381 0.78350515 0.68487395 0.56321839
|
|
0.74647887 0.40718563 0.74299065 0.20958084]
|
|
|
|
mean value: 0.5598830910256378
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01921082 0.01736236 0.0253129 0.01849151 0.0182991 0.01945162
|
|
0.01927853 0.01889205 0.01764226 0.01667428]
|
|
|
|
mean value: 0.01906154155731201
|
|
|
|
key: score_time
|
|
value: [0.01197815 0.01276588 0.01220345 0.01197982 0.01197791 0.01204348
|
|
0.01165199 0.01158023 0.01158094 0.01265383]
|
|
|
|
mean value: 0.012041568756103516
|
|
|
|
key: test_mcc
|
|
value: [0.5976143 0.52704628 0.45906433 0.30384671 0.24850835 0.35769558
|
|
0.47328975 0.75938069 0.56725146 0.58342636]
|
|
|
|
mean value: 0.48771238204603873
|
|
|
|
key: train_mcc
|
|
value: [0.62757163 0.69823795 0.74077144 0.7611919 0.44317649 0.27696763
|
|
0.74341426 0.61820368 0.7033603 0.65751611]
|
|
|
|
mean value: 0.6270411379306355
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.76315789 0.72972973 0.64864865 0.59459459 0.62162162
|
|
0.72972973 0.86486486 0.78378378 0.78378378]
|
|
|
|
mean value: 0.7283072546230441
|
|
|
|
key: train_accuracy
|
|
value: [0.78742515 0.84730539 0.86865672 0.88059701 0.66865672 0.57014925
|
|
0.87164179 0.7880597 0.85074627 0.8238806 ]
|
|
|
|
mean value: 0.7957118598623648
|
|
|
|
key: test_fscore
|
|
value: [0.80851064 0.76923077 0.72222222 0.66666667 0.68085106 0.36363636
|
|
0.70588235 0.84848485 0.78947368 0.76470588]
|
|
|
|
mean value: 0.7119664491873173
|
|
|
|
key: train_fscore
|
|
value: [0.82294264 0.85470085 0.8625 0.88095238 0.7505618 0.25
|
|
0.87240356 0.73992674 0.85549133 0.80655738]
|
|
|
|
mean value: 0.7696036684084115
|
|
|
|
key: test_precision
|
|
value: [0.67857143 0.75 0.72222222 0.61904762 0.55172414 1.
|
|
0.8 1. 0.78947368 0.86666667]
|
|
|
|
mean value: 0.7777705758649498
|
|
|
|
key: train_precision
|
|
value: [0.70512821 0.81521739 0.90789474 0.88095238 0.60288809 1.
|
|
0.86470588 0.95283019 0.82681564 0.89130435]
|
|
|
|
mean value: 0.8447736862186013
|
|
|
|
key: test_recall
|
|
value: [1. 0.78947368 0.72222222 0.72222222 0.88888889 0.22222222
|
|
0.63157895 0.73684211 0.78947368 0.68421053]
|
|
|
|
mean value: 0.7187134502923976
|
|
|
|
key: train_recall
|
|
value: [0.98802395 0.89820359 0.82142857 0.88095238 0.99404762 0.14285714
|
|
0.88023952 0.60479042 0.88622754 0.73652695]
|
|
|
|
mean value: 0.7833297690333618
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.76315789 0.72953216 0.6505848 0.60233918 0.61111111
|
|
0.73245614 0.86842105 0.78362573 0.78654971]
|
|
|
|
mean value: 0.729093567251462
|
|
|
|
key: train_roc_auc
|
|
value: [0.78742515 0.84730539 0.86879812 0.88059595 0.66768249 0.57142857
|
|
0.87166738 0.78751426 0.85085187 0.82362062]
|
|
|
|
mean value: 0.7956889791844881
|
|
|
|
key: test_jcc
|
|
value: [0.67857143 0.625 0.56521739 0.5 0.51612903 0.22222222
|
|
0.54545455 0.73684211 0.65217391 0.61904762]
|
|
|
|
mean value: 0.5660658257164863
|
|
|
|
key: train_jcc
|
|
value: [0.69915254 0.74626866 0.75824176 0.78723404 0.60071942 0.14285714
|
|
0.77368421 0.5872093 0.74747475 0.67582418]
|
|
|
|
mean value: 0.6518666003352644
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1658709 0.1518445 0.14408374 0.14378572 0.17664433 0.14879799
|
|
0.14740419 0.17675805 0.14548826 0.14485359]
|
|
|
|
mean value: 0.15455312728881837
|
|
|
|
key: score_time
|
|
value: [0.01742864 0.01529813 0.01540756 0.01533318 0.01529312 0.01535153
|
|
0.01789331 0.01594448 0.01528716 0.01537848]
|
|
|
|
mean value: 0.01586155891418457
|
|
|
|
key: test_mcc
|
|
value: [0.63960215 0.57894737 0.62170355 0.47328975 0.30384671 0.89679028
|
|
0.56725146 0.73020842 0.6754386 0.69356297]
|
|
|
|
mean value: 0.6180641255697805
|
|
|
|
key: train_mcc
|
|
value: [0.96407186 0.97021644 0.94029798 0.94641713 0.96424625 0.95836453
|
|
0.94642285 0.95230797 0.92842631 0.95836453]
|
|
|
|
mean value: 0.9529135841114293
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.78947368 0.81081081 0.72972973 0.64864865 0.94594595
|
|
0.78378378 0.86486486 0.83783784 0.83783784]
|
|
|
|
mean value: 0.8064722617354196
|
|
|
|
key: train_accuracy
|
|
value: [0.98203593 0.98502994 0.97014925 0.97313433 0.98208955 0.97910448
|
|
0.97313433 0.9761194 0.9641791 0.97910448]
|
|
|
|
mean value: 0.9764080793636607
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.78947368 0.8 0.75 0.66666667 0.94117647
|
|
0.78947368 0.87179487 0.84210526 0.82352941]
|
|
|
|
mean value: 0.8074220052393427
|
|
|
|
key: train_fscore
|
|
value: [0.98203593 0.9851632 0.9702381 0.97345133 0.98224852 0.97897898
|
|
0.97329377 0.97619048 0.96428571 0.97922849]
|
|
|
|
mean value: 0.9765114500921317
|
|
|
|
key: test_precision
|
|
value: [0.875 0.78947368 0.82352941 0.68181818 0.61904762 1.
|
|
0.78947368 0.85 0.84210526 0.93333333]
|
|
|
|
mean value: 0.8203781177542787
|
|
|
|
key: train_precision
|
|
value: [0.98203593 0.97647059 0.9702381 0.96491228 0.97647059 0.98787879
|
|
0.96470588 0.9704142 0.95857988 0.97058824]
|
|
|
|
mean value: 0.9722294468920234
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.78947368 0.77777778 0.83333333 0.72222222 0.88888889
|
|
0.78947368 0.89473684 0.84210526 0.73684211]
|
|
|
|
mean value: 0.8011695906432749
|
|
|
|
key: train_recall
|
|
value: [0.98203593 0.99401198 0.9702381 0.98214286 0.98809524 0.9702381
|
|
0.98203593 0.98203593 0.97005988 0.98802395]
|
|
|
|
mean value: 0.9808917878528657
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.78947368 0.80994152 0.73245614 0.6505848 0.94444444
|
|
0.78362573 0.86403509 0.8377193 0.84064327]
|
|
|
|
mean value: 0.8068713450292397
|
|
|
|
key: train_roc_auc
|
|
value: [0.98203593 0.98502994 0.97014899 0.97310736 0.98207157 0.97913102
|
|
0.97316082 0.97613701 0.96419661 0.97913102]
|
|
|
|
mean value: 0.9764150270886798
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.65217391 0.66666667 0.6 0.5 0.88888889
|
|
0.65217391 0.77272727 0.72727273 0.7 ]
|
|
|
|
mean value: 0.6826570048309178
|
|
|
|
key: train_jcc
|
|
value: [0.96470588 0.97076023 0.94219653 0.94827586 0.96511628 0.95882353
|
|
0.94797688 0.95348837 0.93103448 0.95930233]
|
|
|
|
mean value: 0.9541680377659231
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06761193 0.06841898 0.0912056 0.07453728 0.06201434 0.07745528
|
|
0.07187891 0.08265615 0.04212904 0.0562861 ]
|
|
|
|
mean value: 0.06941936016082764
|
|
|
|
key: score_time
|
|
value: [0.02392149 0.03674245 0.0177021 0.01945925 0.02493858 0.01984835
|
|
0.01979136 0.03673816 0.01775742 0.01745152]
|
|
|
|
mean value: 0.023435068130493165
|
|
|
|
key: test_mcc
|
|
value: [0.58218174 0.68803296 0.40469382 0.40469382 0.30384671 0.73020842
|
|
0.53638795 0.62280702 0.62280702 0.69356297]
|
|
|
|
mean value: 0.558922242339767
|
|
|
|
key: train_mcc
|
|
value: [0.98205353 0.96469469 0.97030621 0.96445635 0.96445635 0.97030621
|
|
0.95863214 0.97057893 0.95230457 0.95863214]
|
|
|
|
mean value: 0.9656421131273293
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.84210526 0.7027027 0.7027027 0.64864865 0.86486486
|
|
0.75675676 0.81081081 0.81081081 0.83783784]
|
|
|
|
mean value: 0.7766714082503556
|
|
|
|
key: train_accuracy
|
|
value: [0.99101796 0.98203593 0.98507463 0.98208955 0.98208955 0.98507463
|
|
0.97910448 0.98507463 0.9761194 0.97910448]
|
|
|
|
mean value: 0.9826785235499151
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.83333333 0.68571429 0.68571429 0.66666667 0.85714286
|
|
0.72727273 0.81081081 0.81081081 0.82352941]
|
|
|
|
mean value: 0.767877296700826
|
|
|
|
key: train_fscore
|
|
value: [0.99099099 0.98170732 0.98498498 0.98192771 0.98192771 0.98498498
|
|
0.9787234 0.98480243 0.97590361 0.9787234 ]
|
|
|
|
mean value: 0.9824676554300291
|
|
|
|
key: test_precision
|
|
value: [0.82352941 0.88235294 0.70588235 0.70588235 0.61904762 0.88235294
|
|
0.85714286 0.83333333 0.83333333 0.93333333]
|
|
|
|
mean value: 0.8076190476190477
|
|
|
|
key: train_precision
|
|
value: [0.9939759 1. 0.99393939 0.99390244 0.99390244 0.99393939
|
|
0.99382716 1. 0.98181818 0.99382716]
|
|
|
|
mean value: 0.9939132072347863
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.78947368 0.66666667 0.66666667 0.72222222 0.83333333
|
|
0.63157895 0.78947368 0.78947368 0.73684211]
|
|
|
|
mean value: 0.7362573099415205
|
|
|
|
key: train_recall
|
|
value: [0.98802395 0.96407186 0.97619048 0.9702381 0.9702381 0.97619048
|
|
0.96407186 0.97005988 0.97005988 0.96407186]
|
|
|
|
mean value: 0.9713216424294269
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.84210526 0.70175439 0.70175439 0.6505848 0.86403509
|
|
0.76023392 0.81140351 0.81140351 0.84064327]
|
|
|
|
mean value: 0.7773391812865498
|
|
|
|
key: train_roc_auc
|
|
value: [0.99101796 0.98203593 0.98510123 0.98212504 0.98212504 0.98510123
|
|
0.97905974 0.98502994 0.97610137 0.97905974]
|
|
|
|
mean value: 0.9826757199885943
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.71428571 0.52173913 0.52173913 0.5 0.75
|
|
0.57142857 0.68181818 0.68181818 0.7 ]
|
|
|
|
mean value: 0.627919254658385
|
|
|
|
key: train_jcc
|
|
value: [0.98214286 0.96407186 0.9704142 0.96449704 0.96449704 0.9704142
|
|
0.95833333 0.97005988 0.95294118 0.95833333]
|
|
|
|
mean value: 0.9655704922014159
|
|
|
|
MCC on Blind test: 0.58
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08480191 0.12193632 0.09195971 0.09195113 0.08902097 0.0901897
|
|
0.09894681 0.12061763 0.13100004 0.12856603]
|
|
|
|
mean value: 0.10489902496337891
|
|
|
|
key: score_time
|
|
value: [0.02624893 0.02163124 0.02177572 0.02177191 0.02813363 0.02941966
|
|
0.0259738 0.02178097 0.02193069 0.02302551]
|
|
|
|
mean value: 0.024169206619262695
|
|
|
|
key: test_mcc
|
|
value: [0.42163702 0.37047929 0.35104619 0.45906433 0.24269006 0.51793973
|
|
0.36315314 0.51319869 0.40780312 0.51319869]
|
|
|
|
mean value: 0.4160210256714056
|
|
|
|
key: train_mcc
|
|
value: [0.97007727 0.9760479 0.97611919 0.97016681 0.98813046 0.97016681
|
|
0.98813046 0.98224601 0.97016575 0.97016575]
|
|
|
|
mean value: 0.9761416407044281
|
|
|
|
key: test_accuracy
|
|
value: [0.71052632 0.68421053 0.67567568 0.72972973 0.62162162 0.75675676
|
|
0.67567568 0.75675676 0.7027027 0.75675676]
|
|
|
|
mean value: 0.7070412517780938
|
|
|
|
key: train_accuracy
|
|
value: [0.98502994 0.98802395 0.9880597 0.98507463 0.99402985 0.98507463
|
|
0.99402985 0.99104478 0.98507463 0.98507463]
|
|
|
|
mean value: 0.9880516578782733
|
|
|
|
key: test_fscore
|
|
value: [0.71794872 0.66666667 0.64705882 0.72222222 0.61111111 0.72727273
|
|
0.64705882 0.76923077 0.73170732 0.76923077]
|
|
|
|
mean value: 0.7009507947814978
|
|
|
|
key: train_fscore
|
|
value: [0.98498498 0.98802395 0.98809524 0.98507463 0.99401198 0.98507463
|
|
0.99404762 0.99093656 0.98498498 0.98498498]
|
|
|
|
mean value: 0.9880219549864107
|
|
|
|
key: test_precision
|
|
value: [0.7 0.70588235 0.6875 0.72222222 0.61111111 0.8
|
|
0.73333333 0.75 0.68181818 0.75 ]
|
|
|
|
mean value: 0.7141867201426025
|
|
|
|
key: train_precision
|
|
value: [0.98795181 0.98802395 0.98809524 0.98802395 1. 0.98802395
|
|
0.98816568 1. 0.98795181 0.98795181]
|
|
|
|
mean value: 0.9904188196542782
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.63157895 0.61111111 0.72222222 0.61111111 0.66666667
|
|
0.57894737 0.78947368 0.78947368 0.78947368]
|
|
|
|
mean value: 0.6926900584795321
|
|
|
|
key: train_recall
|
|
value: [0.98203593 0.98802395 0.98809524 0.98214286 0.98809524 0.98214286
|
|
1. 0.98203593 0.98203593 0.98203593]
|
|
|
|
mean value: 0.9856643855146849
|
|
|
|
key: test_roc_auc
|
|
value: [0.71052632 0.68421053 0.67397661 0.72953216 0.62134503 0.75438596
|
|
0.67836257 0.75584795 0.7002924 0.75584795]
|
|
|
|
mean value: 0.7064327485380117
|
|
|
|
key: train_roc_auc
|
|
value: [0.98502994 0.98802395 0.9880596 0.9850834 0.99404762 0.9850834
|
|
0.99404762 0.99101796 0.98506558 0.98506558]
|
|
|
|
mean value: 0.9880524664955803
|
|
|
|
key: test_jcc
|
|
value: [0.56 0.5 0.47826087 0.56521739 0.44 0.57142857
|
|
0.47826087 0.625 0.57692308 0.625 ]
|
|
|
|
mean value: 0.542009077878643
|
|
|
|
key: train_jcc
|
|
value: [0.9704142 0.97633136 0.97647059 0.97058824 0.98809524 0.97058824
|
|
0.98816568 0.98203593 0.9704142 0.9704142 ]
|
|
|
|
mean value: 0.9763517870032894
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.58192515 0.55447221 0.55259275 0.55491734 0.55426311 0.55481744
|
|
0.54255867 0.58045793 0.60405684 0.59816074]
|
|
|
|
mean value: 0.5678222179412842
|
|
|
|
key: score_time
|
|
value: [0.00941396 0.00987577 0.01007271 0.00946283 0.00943446 0.01059031
|
|
0.00934267 0.01484299 0.00936031 0.0098803 ]
|
|
|
|
mean value: 0.010227632522583009
|
|
|
|
key: test_mcc
|
|
value: [0.76376262 0.58218174 0.51461988 0.40643275 0.36315314 0.73099415
|
|
0.58342636 0.73020842 0.78362573 0.74044197]
|
|
|
|
mean value: 0.6198846763908076
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 0.99404741
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999404740751082
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.78947368 0.75675676 0.7027027 0.67567568 0.86486486
|
|
0.78378378 0.86486486 0.89189189 0.86486486]
|
|
|
|
mean value: 0.8063300142247511
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.99701493
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997014925373134
|
|
|
|
key: test_fscore
|
|
value: [0.84848485 0.77777778 0.75675676 0.7027027 0.7 0.86486486
|
|
0.76470588 0.87179487 0.89473684 0.85714286]
|
|
|
|
mean value: 0.8038967403982884
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 0.99703264
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997032640949555
|
|
|
|
key: test_precision
|
|
value: [1. 0.82352941 0.73684211 0.68421053 0.63636364 0.84210526
|
|
0.86666667 0.85 0.89473684 0.9375 ]
|
|
|
|
mean value: 0.8271954451637114
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.99408284
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994082840236687
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.73684211 0.77777778 0.72222222 0.77777778 0.88888889
|
|
0.68421053 0.89473684 0.89473684 0.78947368]
|
|
|
|
mean value: 0.7903508771929825
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86842105 0.78947368 0.75730994 0.70321637 0.67836257 0.86549708
|
|
0.78654971 0.86403509 0.89181287 0.86695906]
|
|
|
|
mean value: 0.8071637426900585
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.99700599
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997005988023953
|
|
|
|
key: test_jcc
|
|
value: [0.73684211 0.63636364 0.60869565 0.54166667 0.53846154 0.76190476
|
|
0.61904762 0.77272727 0.80952381 0.75 ]
|
|
|
|
mean value: 0.6775233062132375
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 0.99408284
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994082840236687
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02679706 0.02562833 0.02642989 0.02610731 0.02577448 0.02580571
|
|
0.03485513 0.05354261 0.03805089 0.03192067]
|
|
|
|
mean value: 0.031491208076477054
|
|
|
|
key: score_time
|
|
value: [0.01956224 0.02009726 0.01275182 0.01610994 0.01711559 0.01675367
|
|
0.02614713 0.02827811 0.01272583 0.01298022]
|
|
|
|
mean value: 0.018252182006835937
|
|
|
|
key: test_mcc
|
|
value: [ 0.47368421 0.22645541 0.62807634 0.45906433 -0.02109391 0.57184997
|
|
0.39648395 0.37654316 0.31339521 0.44331728]
|
|
|
|
mean value: 0.38677759448904186
|
|
|
|
key: train_mcc
|
|
value: [0.8982197 0.9589266 0.94198829 0.92240242 0.85882568 0.82916187
|
|
0.88026879 0.86415116 0.8521016 0.90860677]
|
|
|
|
mean value: 0.8914652895956063
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.60526316 0.81081081 0.72972973 0.48648649 0.78378378
|
|
0.67567568 0.67567568 0.64864865 0.7027027 ]
|
|
|
|
mean value: 0.6855618776671408
|
|
|
|
key: train_accuracy
|
|
value: [0.9491018 0.97904192 0.97014925 0.96119403 0.92537313 0.90746269
|
|
0.93731343 0.92835821 0.92238806 0.95223881]
|
|
|
|
mean value: 0.9432621324515149
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.51612903 0.78787879 0.72222222 0.53658537 0.78947368
|
|
0.6 0.625 0.60606061 0.64516129]
|
|
|
|
mean value: 0.6565353094069604
|
|
|
|
key: train_fscore
|
|
value: [0.94894895 0.97859327 0.96932515 0.96142433 0.93036212 0.91553134
|
|
0.93333333 0.92307692 0.91666667 0.94968553]
|
|
|
|
mean value: 0.9426947616648275
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.66666667 0.86666667 0.72222222 0.47826087 0.75
|
|
0.81818182 0.76923077 0.71428571 0.83333333]
|
|
|
|
mean value: 0.7355690165415566
|
|
|
|
key: train_precision
|
|
value: [0.95180723 1. 1. 0.95857988 0.87434555 0.84422111
|
|
0.99324324 0.99310345 0.9862069 1. ]
|
|
|
|
mean value: 0.9601507353909154
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.42105263 0.72222222 0.72222222 0.61111111 0.83333333
|
|
0.47368421 0.52631579 0.52631579 0.52631579]
|
|
|
|
mean value: 0.6099415204678362
|
|
|
|
key: train_recall
|
|
value: [0.94610778 0.95808383 0.94047619 0.96428571 0.99404762 1.
|
|
0.88023952 0.86227545 0.85628743 0.90419162]
|
|
|
|
mean value: 0.9305995152552039
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.60526316 0.80847953 0.72953216 0.48976608 0.78508772
|
|
0.68128655 0.67982456 0.65204678 0.70760234]
|
|
|
|
mean value: 0.6875730994152047
|
|
|
|
key: train_roc_auc
|
|
value: [0.9491018 0.97904192 0.9702381 0.96118477 0.92516752 0.90718563
|
|
0.93714357 0.92816153 0.92219133 0.95209581]
|
|
|
|
mean value: 0.9431511976047905
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.34782609 0.65 0.56521739 0.36666667 0.65217391
|
|
0.42857143 0.45454545 0.43478261 0.47619048]
|
|
|
|
mean value: 0.49593073593073594
|
|
|
|
key: train_jcc
|
|
value: [0.90285714 0.95808383 0.94047619 0.92571429 0.86979167 0.84422111
|
|
0.875 0.85714286 0.84615385 0.90419162]
|
|
|
|
mean value: 0.8923632543640423
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01501822 0.01518178 0.01479936 0.02877498 0.03405762 0.03440952
|
|
0.03562093 0.03669286 0.0150733 0.01502323]
|
|
|
|
mean value: 0.024465179443359374
|
|
|
|
key: score_time
|
|
value: [0.0121944 0.01211214 0.01216698 0.01837492 0.03033447 0.02411437
|
|
0.01819682 0.01674509 0.0121944 0.01219988]
|
|
|
|
mean value: 0.016863346099853516
|
|
|
|
key: test_mcc
|
|
value: [0.42640143 0.31622777 0.51461988 0.18768409 0.36315314 0.51793973
|
|
0.47328975 0.7888597 0.47328975 0.62280702]
|
|
|
|
mean value: 0.4684272265294213
|
|
|
|
key: train_mcc
|
|
value: [0.69472288 0.74263481 0.71941831 0.78528137 0.76748344 0.71385788
|
|
0.77319551 0.72559716 0.7373543 0.73737302]
|
|
|
|
mean value: 0.7396918675703859
|
|
|
|
key: test_accuracy
|
|
value: [0.71052632 0.65789474 0.75675676 0.59459459 0.67567568 0.75675676
|
|
0.72972973 0.89189189 0.72972973 0.81081081]
|
|
|
|
mean value: 0.7314366998577525
|
|
|
|
key: train_accuracy
|
|
value: [0.84730539 0.87125749 0.85970149 0.89253731 0.88358209 0.85671642
|
|
0.88656716 0.86268657 0.86865672 0.86865672]
|
|
|
|
mean value: 0.8697667351863437
|
|
|
|
key: test_fscore
|
|
value: [0.73170732 0.64864865 0.75675676 0.57142857 0.7 0.72727273
|
|
0.70588235 0.88888889 0.70588235 0.81081081]
|
|
|
|
mean value: 0.7247278426761927
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.84866469 0.87240356 0.85970149 0.89411765 0.8856305 0.85964912
|
|
0.88690476 0.86390533 0.86746988 0.86904762]
|
|
|
|
mean value: 0.870749459610928
|
|
|
|
key: test_precision
|
|
value: [0.68181818 0.66666667 0.73684211 0.58823529 0.63636364 0.8
|
|
0.8 0.94117647 0.8 0.83333333]
|
|
|
|
mean value: 0.7484435688150859
|
|
|
|
key: train_precision
|
|
value: [0.84117647 0.86470588 0.86227545 0.88372093 0.87283237 0.84482759
|
|
0.8816568 0.85380117 0.87272727 0.86390533]
|
|
|
|
mean value: 0.8641629260920055
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.63157895 0.77777778 0.55555556 0.77777778 0.66666667
|
|
0.63157895 0.84210526 0.63157895 0.78947368]
|
|
|
|
mean value: 0.7093567251461989
|
|
|
|
key: train_recall
|
|
value: [0.85628743 0.88023952 0.85714286 0.9047619 0.89880952 0.875
|
|
0.89221557 0.8742515 0.86227545 0.8742515 ]
|
|
|
|
mean value: 0.8775235243798118
|
|
|
|
key: test_roc_auc
|
|
value: [0.71052632 0.65789474 0.75730994 0.59356725 0.67836257 0.75438596
|
|
0.73245614 0.89327485 0.73245614 0.81140351]
|
|
|
|
mean value: 0.7321637426900585
|
|
|
|
key: train_roc_auc
|
|
value: [0.84730539 0.87125749 0.85970915 0.89250071 0.8835365 0.85666168
|
|
0.88658397 0.86272099 0.86863772 0.86867337]
|
|
|
|
mean value: 0.8697586968919304
|
|
|
|
key: test_jcc
|
|
value: [0.57692308 0.48 0.60869565 0.4 0.53846154 0.57142857
|
|
0.54545455 0.8 0.54545455 0.68181818]
|
|
|
|
mean value: 0.5748236111714372
|
|
|
|
key: train_jcc
|
|
value: [0.7371134 0.77368421 0.7539267 0.80851064 0.79473684 0.75384615
|
|
0.79679144 0.76041667 0.76595745 0.76842105]
|
|
|
|
mean value: 0.7713404558365164
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.23106289 0.24761438 0.16992521 0.18633819 0.15456986 0.26675892
|
|
0.26933575 0.27165508 0.26649928 0.15069866]
|
|
|
|
mean value: 0.22144582271575927
|
|
|
|
key: score_time
|
|
value: [0.02113533 0.01206231 0.02296948 0.02295542 0.01202726 0.01216412
|
|
0.0203042 0.01712108 0.022331 0.01235795]
|
|
|
|
mean value: 0.017542815208435057
|
|
|
|
key: test_mcc
|
|
value: [0.59222009 0.47633051 0.45906433 0.36315314 0.25301653 0.51793973
|
|
0.47328975 0.83918129 0.4633451 0.51319869]
|
|
|
|
mean value: 0.49507391657724253
|
|
|
|
key: train_mcc
|
|
value: [0.62892295 0.65386793 0.64287114 0.66638171 0.70746748 0.71385788
|
|
0.77319551 0.62418887 0.79105329 0.61958014]
|
|
|
|
mean value: 0.6821386891155579
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.73684211 0.72972973 0.67567568 0.62162162 0.75675676
|
|
0.72972973 0.91891892 0.72972973 0.75675676]
|
|
|
|
mean value: 0.7445234708392603
|
|
|
|
key: train_accuracy
|
|
value: [0.81437126 0.82634731 0.82089552 0.83283582 0.85373134 0.85671642
|
|
0.88656716 0.8119403 0.89552239 0.80895522]
|
|
|
|
mean value: 0.8407882741978729
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.72222222 0.72222222 0.7 0.65 0.72727273
|
|
0.70588235 0.91891892 0.72222222 0.76923077]
|
|
|
|
mean value: 0.7447495244554068
|
|
|
|
key: train_fscore
|
|
value: [0.81656805 0.83139535 0.8265896 0.8372093 0.85459941 0.85964912
|
|
0.88690476 0.81415929 0.89489489 0.8150289 ]
|
|
|
|
mean value: 0.8436998673780158
|
|
|
|
key: test_precision
|
|
value: [0.73913043 0.76470588 0.72222222 0.63636364 0.59090909 0.8
|
|
0.8 0.94444444 0.76470588 0.75 ]
|
|
|
|
mean value: 0.7512481593427884
|
|
|
|
key: train_precision
|
|
value: [0.80701754 0.8079096 0.80337079 0.81818182 0.85207101 0.84482759
|
|
0.8816568 0.80232558 0.89759036 0.7877095 ]
|
|
|
|
mean value: 0.8302660589983715
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.68421053 0.72222222 0.77777778 0.72222222 0.66666667
|
|
0.63157895 0.89473684 0.68421053 0.78947368]
|
|
|
|
mean value: 0.7467836257309941
|
|
|
|
key: train_recall
|
|
value: [0.82634731 0.85628743 0.85119048 0.85714286 0.85714286 0.875
|
|
0.89221557 0.82634731 0.89221557 0.84431138]
|
|
|
|
mean value: 0.8578200741374394
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.73684211 0.72953216 0.67836257 0.62426901 0.75438596
|
|
0.73245614 0.91959064 0.73099415 0.75584795]
|
|
|
|
mean value: 0.7451754385964913
|
|
|
|
key: train_roc_auc
|
|
value: [0.81437126 0.82634731 0.82080482 0.83276305 0.85372113 0.85666168
|
|
0.88658397 0.81198318 0.89551255 0.80906045]
|
|
|
|
mean value: 0.8407809381237525
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.56521739 0.56521739 0.53846154 0.48148148 0.57142857
|
|
0.54545455 0.85 0.56521739 0.625 ]
|
|
|
|
mean value: 0.598747831073918
|
|
|
|
key: train_jcc
|
|
value: [0.69 0.71144279 0.7044335 0.72 0.74611399 0.75384615
|
|
0.79679144 0.68656716 0.80978261 0.68780488]
|
|
|
|
mean value: 0.7306782521863862
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03377938 0.03449535 0.03464365 0.03529739 0.03566742 0.03977704
|
|
0.03550386 0.04544282 0.04554224 0.05901814]
|
|
|
|
mean value: 0.03991672992706299
|
|
|
|
key: score_time
|
|
value: [0.01575065 0.01500201 0.01457238 0.01548862 0.01224232 0.01256585
|
|
0.01259995 0.0118897 0.01187158 0.01687217]
|
|
|
|
mean value: 0.01388552188873291
|
|
|
|
key: test_mcc
|
|
value: [0.6778302 0.43041423 0.54812195 0.35983579 0.54812195 0.68826048
|
|
0.13608276 0.40089186 0.13363062 0.6681531 ]
|
|
|
|
mean value: 0.459134295328864
|
|
|
|
key: train_mcc
|
|
value: [0.65830691 0.69459875 0.65095488 0.67365676 0.67274939 0.6877647
|
|
0.70291701 0.66729768 0.74655324 0.67435582]
|
|
|
|
mean value: 0.682915514138051
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.70967742 0.77419355 0.67741935 0.77419355 0.83870968
|
|
0.56666667 0.7 0.56666667 0.83333333]
|
|
|
|
mean value: 0.7279569892473118
|
|
|
|
key: train_accuracy
|
|
value: [0.82909091 0.84727273 0.82545455 0.83636364 0.83636364 0.84363636
|
|
0.85144928 0.83333333 0.87318841 0.83695652]
|
|
|
|
mean value: 0.8413109354413703
|
|
|
|
key: test_fscore
|
|
value: [0.82758621 0.72727273 0.75862069 0.66666667 0.78787879 0.85714286
|
|
0.51851852 0.68965517 0.55172414 0.83870968]
|
|
|
|
mean value: 0.7223775441795464
|
|
|
|
key: train_fscore
|
|
value: [0.83154122 0.84892086 0.82733813 0.83985765 0.83636364 0.84587814
|
|
0.85198556 0.83687943 0.87455197 0.83985765]
|
|
|
|
mean value: 0.843317425001627
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.66666667 0.78571429 0.71428571 0.76470588 0.78947368
|
|
0.58333333 0.71428571 0.57142857 0.8125 ]
|
|
|
|
mean value: 0.725953670942061
|
|
|
|
key: train_precision
|
|
value: [0.82269504 0.84285714 0.82142857 0.81944444 0.83333333 0.83098592
|
|
0.84892086 0.81944444 0.86524823 0.82517483]
|
|
|
|
mean value: 0.832953280289642
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 0.73333333 0.625 0.8125 0.9375
|
|
0.46666667 0.66666667 0.53333333 0.86666667]
|
|
|
|
mean value: 0.7241666666666666
|
|
|
|
key: train_recall
|
|
value: [0.84057971 0.85507246 0.83333333 0.86131387 0.83941606 0.86131387
|
|
0.85507246 0.85507246 0.88405797 0.85507246]
|
|
|
|
mean value: 0.8540304665185655
|
|
|
|
key: test_roc_auc
|
|
value: [0.8375 0.7125 0.77291667 0.67916667 0.77291667 0.83541667
|
|
0.56666667 0.7 0.56666667 0.83333333]
|
|
|
|
mean value: 0.7277083333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.82904898 0.84724426 0.82542579 0.83645404 0.8363747 0.84370041
|
|
0.85144928 0.83333333 0.87318841 0.83695652]
|
|
|
|
mean value: 0.8413175711414366
|
|
|
|
key: test_jcc
|
|
value: [0.70588235 0.57142857 0.61111111 0.5 0.65 0.75
|
|
0.35 0.52631579 0.38095238 0.72222222]
|
|
|
|
mean value: 0.5767912428129146
|
|
|
|
key: train_jcc
|
|
value: [0.71165644 0.7375 0.70552147 0.72392638 0.71875 0.73291925
|
|
0.74213836 0.7195122 0.77707006 0.72392638]
|
|
|
|
mean value: 0.7292920553101104
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.76020503 0.79178238 0.93050313 0.82002664 0.92459559 1.04001236
|
|
0.81016803 0.92450762 0.81985116 0.86520219]
|
|
|
|
mean value: 0.8686854124069214
|
|
|
|
key: score_time
|
|
value: [0.01202726 0.01574039 0.01197004 0.01215267 0.01216245 0.01203179
|
|
0.01205277 0.01200676 0.01235318 0.01223612]
|
|
|
|
mean value: 0.012473344802856445
|
|
|
|
key: test_mcc
|
|
value: [0.6778302 0.43041423 0.4184137 0.29960206 0.54812195 0.68826048
|
|
0.2 0.47087096 0.13363062 0.68041382]
|
|
|
|
mean value: 0.4547558024028332
|
|
|
|
key: train_mcc
|
|
value: [0.59418851 0.63640693 0.59276306 0.65208251 0.61471725 0.62911245
|
|
0.65957616 0.56575238 0.68842387 0.61673597]
|
|
|
|
mean value: 0.6249759091307765
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.70967742 0.70967742 0.64516129 0.77419355 0.83870968
|
|
0.6 0.73333333 0.56666667 0.83333333]
|
|
|
|
mean value: 0.7249462365591398
|
|
|
|
key: train_accuracy
|
|
value: [0.79636364 0.81818182 0.79636364 0.82545455 0.80727273 0.81454545
|
|
0.82971014 0.7826087 0.8442029 0.80797101]
|
|
|
|
mean value: 0.8122674571805006
|
|
|
|
key: test_fscore
|
|
value: [0.82758621 0.72727273 0.68965517 0.62068966 0.78787879 0.85714286
|
|
0.6 0.75 0.55172414 0.84848485]
|
|
|
|
mean value: 0.7260434393193014
|
|
|
|
key: train_fscore
|
|
value: [0.8041958 0.82014388 0.79856115 0.82978723 0.80866426 0.81454545
|
|
0.83154122 0.78723404 0.84476534 0.81272085]
|
|
|
|
mean value: 0.8152159240890843
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.66666667 0.71428571 0.69230769 0.76470588 0.78947368
|
|
0.6 0.70588235 0.57142857 0.77777778]
|
|
|
|
mean value: 0.7139671199113924
|
|
|
|
key: train_precision
|
|
value: [0.77702703 0.81428571 0.79285714 0.80689655 0.8 0.8115942
|
|
0.82269504 0.77083333 0.84172662 0.79310345]
|
|
|
|
mean value: 0.8031019074567797
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 0.66666667 0.5625 0.8125 0.9375
|
|
0.6 0.8 0.53333333 0.93333333]
|
|
|
|
mean value: 0.7445833333333334
|
|
|
|
key: train_recall
|
|
value: [0.83333333 0.82608696 0.80434783 0.8540146 0.81751825 0.81751825
|
|
0.84057971 0.80434783 0.84782609 0.83333333]
|
|
|
|
mean value: 0.8278906167354279
|
|
|
|
key: test_roc_auc
|
|
value: [0.8375 0.7125 0.70833333 0.64791667 0.77291667 0.83541667
|
|
0.6 0.73333333 0.56666667 0.83333333]
|
|
|
|
mean value: 0.7247916666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.79622871 0.81815297 0.7963345 0.82555802 0.80730985 0.81455623
|
|
0.82971014 0.7826087 0.8442029 0.80797101]
|
|
|
|
mean value: 0.8122633026552417
|
|
|
|
key: test_jcc
|
|
value: [0.70588235 0.57142857 0.52631579 0.45 0.65 0.75
|
|
0.42857143 0.6 0.38095238 0.73684211]
|
|
|
|
mean value: 0.5799992628630399
|
|
|
|
key: train_jcc
|
|
value: [0.67251462 0.69512195 0.66467066 0.70909091 0.67878788 0.68711656
|
|
0.71165644 0.64912281 0.73125 0.68452381]
|
|
|
|
mean value: 0.6883855640340298
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01418853 0.01279187 0.00965738 0.00956011 0.01020837 0.00937939
|
|
0.00952101 0.01197052 0.00954056 0.00964451]
|
|
|
|
mean value: 0.010646224021911621
|
|
|
|
key: score_time
|
|
value: [0.01258636 0.00933838 0.0092454 0.00916553 0.00925994 0.00869632
|
|
0.00937986 0.00906706 0.00944376 0.00919294]
|
|
|
|
mean value: 0.009537553787231446
|
|
|
|
key: test_mcc
|
|
value: [0.37191715 0.1158132 0.53006813 0.48527095 0.60910959 0.29069387
|
|
0.15075567 0.30151134 0. 0.30151134]
|
|
|
|
mean value: 0.31566512519501916
|
|
|
|
key: train_mcc
|
|
value: [0.40334053 0.41939671 0.37251267 0.40066636 0.401611 0.38495836
|
|
0.42159521 0.40417996 0.412001 0.44343997]
|
|
|
|
mean value: 0.4063701792762806
|
|
|
|
key: test_accuracy
|
|
value: [0.67741935 0.5483871 0.74193548 0.74193548 0.77419355 0.64516129
|
|
0.56666667 0.63333333 0.5 0.63333333]
|
|
|
|
mean value: 0.646236559139785
|
|
|
|
key: train_accuracy
|
|
value: [0.69090909 0.70181818 0.67636364 0.69090909 0.68363636 0.68
|
|
0.69927536 0.69202899 0.68478261 0.72101449]
|
|
|
|
mean value: 0.6920737812911726
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.61111111 0.77777778 0.76470588 0.82051282 0.68571429
|
|
0.64864865 0.7027027 0.51612903 0.7027027 ]
|
|
|
|
mean value: 0.6935887316722231
|
|
|
|
key: train_fscore
|
|
value: [0.73520249 0.7388535 0.72274143 0.73015873 0.73556231 0.72670807
|
|
0.74143302 0.73354232 0.74183976 0.73170732]
|
|
|
|
mean value: 0.7337748964382163
|
|
|
|
key: test_precision
|
|
value: [0.63157895 0.52380952 0.66666667 0.72222222 0.69565217 0.63157895
|
|
0.54545455 0.59090909 0.5 0.59090909]
|
|
|
|
mean value: 0.6098781208621026
|
|
|
|
key: train_precision
|
|
value: [0.64480874 0.65909091 0.63387978 0.64606742 0.63020833 0.63243243
|
|
0.65027322 0.64640884 0.6281407 0.70469799]
|
|
|
|
mean value: 0.6476008369094667
|
|
|
|
key: test_recall
|
|
value: [0.8 0.73333333 0.93333333 0.8125 1. 0.75
|
|
0.8 0.86666667 0.53333333 0.86666667]
|
|
|
|
mean value: 0.8095833333333333
|
|
|
|
key: train_recall
|
|
value: [0.85507246 0.84057971 0.84057971 0.83941606 0.88321168 0.8540146
|
|
0.86231884 0.84782609 0.9057971 0.76086957]
|
|
|
|
mean value: 0.8489685814027292
|
|
|
|
key: test_roc_auc
|
|
value: [0.68125 0.55416667 0.74791667 0.73958333 0.76666667 0.64166667
|
|
0.56666667 0.63333333 0.5 0.63333333]
|
|
|
|
mean value: 0.6464583333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.69030995 0.70131175 0.67576431 0.69144716 0.68435946 0.68063049
|
|
0.69927536 0.69202899 0.68478261 0.72101449]
|
|
|
|
mean value: 0.6920924574209246
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.44 0.63636364 0.61904762 0.69565217 0.52173913
|
|
0.48 0.54166667 0.34782609 0.54166667]
|
|
|
|
mean value: 0.5369416525503482
|
|
|
|
key: train_jcc
|
|
value: [0.58128079 0.58585859 0.56585366 0.575 0.58173077 0.57073171
|
|
0.58910891 0.57920792 0.58962264 0.57692308]
|
|
|
|
mean value: 0.5795318059236032
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01059604 0.0102663 0.01141644 0.01082611 0.01077914 0.01080894
|
|
0.01075387 0.01057267 0.00982332 0.01071548]
|
|
|
|
mean value: 0.010655832290649415
|
|
|
|
key: score_time
|
|
value: [0.00911498 0.00915074 0.00978041 0.00981259 0.00962925 0.00970507
|
|
0.01004362 0.00942516 0.0097158 0.00978398]
|
|
|
|
mean value: 0.00961616039276123
|
|
|
|
key: test_mcc
|
|
value: [0.48527095 0.10687275 0.225 0.54812195 0.61608311 0.61608311
|
|
0.06666667 0.20180184 0.13608276 0.56568542]
|
|
|
|
mean value: 0.3567668567726006
|
|
|
|
key: train_mcc
|
|
value: [0.44830704 0.49105638 0.48388137 0.5145107 0.44859891 0.45491116
|
|
0.49405246 0.47311844 0.50810087 0.45749571]
|
|
|
|
mean value: 0.4774033036828667
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.5483871 0.61290323 0.77419355 0.80645161 0.80645161
|
|
0.53333333 0.6 0.56666667 0.76666667]
|
|
|
|
mean value: 0.6756989247311828
|
|
|
|
key: train_accuracy
|
|
value: [0.72363636 0.74545455 0.74181818 0.75636364 0.72363636 0.72727273
|
|
0.74637681 0.73550725 0.75362319 0.72826087]
|
|
|
|
mean value: 0.7381949934123847
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.58823529 0.6 0.78787879 0.82352941 0.82352941
|
|
0.53333333 0.625 0.51851852 0.8 ]
|
|
|
|
mean value: 0.6814310471663413
|
|
|
|
key: train_fscore
|
|
value: [0.73426573 0.75 0.74733096 0.76491228 0.73239437 0.7311828
|
|
0.75524476 0.74740484 0.76056338 0.73684211]
|
|
|
|
mean value: 0.746014122279795
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.52631579 0.6 0.76470588 0.77777778 0.77777778
|
|
0.53333333 0.58823529 0.58333333 0.7 ]
|
|
|
|
mean value: 0.6620709957397264
|
|
|
|
key: train_precision
|
|
value: [0.70945946 0.73943662 0.73426573 0.73648649 0.70748299 0.71830986
|
|
0.72972973 0.71523179 0.73972603 0.71428571]
|
|
|
|
mean value: 0.7244414411774374
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.6 0.8125 0.875 0.875
|
|
0.53333333 0.66666667 0.46666667 0.93333333]
|
|
|
|
mean value: 0.7095833333333333
|
|
|
|
key: train_recall
|
|
value: [0.76086957 0.76086957 0.76086957 0.79562044 0.75912409 0.74452555
|
|
0.7826087 0.7826087 0.7826087 0.76086957]
|
|
|
|
mean value: 0.7690574420818788
|
|
|
|
key: test_roc_auc
|
|
value: [0.73958333 0.55208333 0.6125 0.77291667 0.80416667 0.80416667
|
|
0.53333333 0.6 0.56666667 0.76666667]
|
|
|
|
mean value: 0.6752083333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.72350048 0.74539829 0.74174865 0.75650587 0.72376494 0.72733524
|
|
0.74637681 0.73550725 0.75362319 0.72826087]
|
|
|
|
mean value: 0.7382021580450651
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.41666667 0.42857143 0.65 0.7 0.7
|
|
0.36363636 0.45454545 0.35 0.66666667]
|
|
|
|
mean value: 0.5285642135642136
|
|
|
|
key: train_jcc
|
|
value: [0.5801105 0.6 0.59659091 0.61931818 0.57777778 0.57627119
|
|
0.60674157 0.59668508 0.61363636 0.58333333]
|
|
|
|
mean value: 0.5950464905241448
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01124001 0.00959182 0.01030231 0.01053524 0.00965929 0.01008248
|
|
0.01049399 0.01036906 0.01040936 0.01085138]
|
|
|
|
mean value: 0.010353493690490722
|
|
|
|
key: score_time
|
|
value: [0.0133214 0.01170659 0.01226521 0.0127995 0.01189852 0.01236296
|
|
0.01204944 0.01194453 0.01191974 0.0138588 ]
|
|
|
|
mean value: 0.012412667274475098
|
|
|
|
key: test_mcc
|
|
value: [ 0.02227177 0.10041929 0.225 -0.10041929 0.36121114 0.22364661
|
|
0.20180184 0.3363364 0.06726728 0.54433105]
|
|
|
|
mean value: 0.19818660850469166
|
|
|
|
key: train_mcc
|
|
value: [0.53716066 0.55636075 0.5932225 0.57107338 0.56423712 0.56391537
|
|
0.5597521 0.50858342 0.5371352 0.52911055]
|
|
|
|
mean value: 0.5520551047595437
|
|
|
|
key: test_accuracy
|
|
value: [0.51612903 0.5483871 0.61290323 0.4516129 0.67741935 0.61290323
|
|
0.6 0.66666667 0.53333333 0.76666667]
|
|
|
|
mean value: 0.5986021505376344
|
|
|
|
key: train_accuracy
|
|
value: [0.76727273 0.77818182 0.79636364 0.78545455 0.78181818 0.78181818
|
|
0.77898551 0.75362319 0.76811594 0.76449275]
|
|
|
|
mean value: 0.7756126482213439
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.5625 0.6 0.48484848 0.72222222 0.64705882
|
|
0.625 0.6875 0.5 0.78787879]
|
|
|
|
mean value: 0.6017008318478907
|
|
|
|
key: train_fscore
|
|
value: [0.77931034 0.77978339 0.80141844 0.78700361 0.78571429 0.78417266
|
|
0.78745645 0.76223776 0.77464789 0.76190476]
|
|
|
|
mean value: 0.7803649593198295
|
|
|
|
key: test_precision
|
|
value: [0.5 0.52941176 0.6 0.47058824 0.65 0.61111111
|
|
0.58823529 0.64705882 0.53846154 0.72222222]
|
|
|
|
mean value: 0.5857088989441931
|
|
|
|
key: train_precision
|
|
value: [0.74342105 0.77697842 0.78472222 0.77857143 0.76923077 0.77304965
|
|
0.75838926 0.73648649 0.75342466 0.77037037]
|
|
|
|
mean value: 0.7644644311448328
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.6 0.6 0.5 0.8125 0.6875
|
|
0.66666667 0.73333333 0.46666667 0.86666667]
|
|
|
|
mean value: 0.6266666666666667
|
|
|
|
key: train_recall
|
|
value: [0.81884058 0.7826087 0.81884058 0.79562044 0.80291971 0.79562044
|
|
0.81884058 0.78985507 0.79710145 0.75362319]
|
|
|
|
mean value: 0.7973870728869142
|
|
|
|
key: test_roc_auc
|
|
value: [0.51041667 0.55 0.6125 0.45 0.67291667 0.61041667
|
|
0.6 0.66666667 0.53333333 0.76666667]
|
|
|
|
mean value: 0.5972916666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.76708452 0.77816566 0.7962816 0.78549138 0.78189464 0.78186819
|
|
0.77898551 0.75362319 0.76811594 0.76449275]
|
|
|
|
mean value: 0.7756003385168729
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.39130435 0.42857143 0.32 0.56521739 0.47826087
|
|
0.45454545 0.52380952 0.33333333 0.65 ]
|
|
|
|
mean value: 0.43950423489553925
|
|
|
|
key: train_jcc
|
|
value: [0.63841808 0.63905325 0.66863905 0.64880952 0.64705882 0.64497041
|
|
0.64942529 0.61581921 0.63218391 0.61538462]
|
|
|
|
mean value: 0.6399762168154934
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01720405 0.01606512 0.01529908 0.01629615 0.01480889 0.01517487
|
|
0.01426411 0.01418185 0.0146811 0.01435494]
|
|
|
|
mean value: 0.015233016014099121
|
|
|
|
key: score_time
|
|
value: [0.01176977 0.01026464 0.01114035 0.01153159 0.01037598 0.01032972
|
|
0.01027489 0.01048756 0.01041889 0.01035976]
|
|
|
|
mean value: 0.010695314407348633
|
|
|
|
key: test_mcc
|
|
value: [0.6778302 0.48954403 0.48333333 0.54812195 0.48527095 0.61608311
|
|
0.2 0.3363364 0.27216553 0.65465367]
|
|
|
|
mean value: 0.4763339177668095
|
|
|
|
key: train_mcc
|
|
value: [0.63868192 0.67353607 0.64363925 0.67365676 0.64381316 0.68069473
|
|
0.68856852 0.66027027 0.7468672 0.64780833]
|
|
|
|
mean value: 0.66975362023314
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.74193548 0.74193548 0.77419355 0.74193548 0.80645161
|
|
0.6 0.66666667 0.63333333 0.8 ]
|
|
|
|
mean value: 0.734516129032258
|
|
|
|
key: train_accuracy
|
|
value: [0.81818182 0.83636364 0.82181818 0.83636364 0.82181818 0.84
|
|
0.8442029 0.82971014 0.87318841 0.82246377]
|
|
|
|
mean value: 0.8344110671936759
|
|
|
|
key: test_fscore
|
|
value: [0.82758621 0.75 0.73333333 0.78787879 0.76470588 0.82352941
|
|
0.6 0.6875 0.59259259 0.83333333]
|
|
|
|
mean value: 0.7400459548152246
|
|
|
|
key: train_fscore
|
|
value: [0.82638889 0.8409894 0.82310469 0.83985765 0.82310469 0.84285714
|
|
0.84587814 0.83392226 0.87544484 0.83044983]
|
|
|
|
mean value: 0.8381997533098544
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.70588235 0.73333333 0.76470588 0.72222222 0.77777778
|
|
0.6 0.64705882 0.66666667 0.71428571]
|
|
|
|
mean value: 0.7189075630252101
|
|
|
|
key: train_precision
|
|
value: [0.79333333 0.82068966 0.82014388 0.81944444 0.81428571 0.82517483
|
|
0.83687943 0.8137931 0.86013986 0.79470199]
|
|
|
|
mean value: 0.8198586240270034
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 0.73333333 0.8125 0.8125 0.875
|
|
0.6 0.73333333 0.53333333 1. ]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_recall
|
|
value: [0.86231884 0.86231884 0.82608696 0.86131387 0.83211679 0.86131387
|
|
0.85507246 0.85507246 0.89130435 0.86956522]
|
|
|
|
mean value: 0.8576483655982228
|
|
|
|
key: test_roc_auc
|
|
value: [0.8375 0.74375 0.74166667 0.77291667 0.73958333 0.80416667
|
|
0.6 0.66666667 0.63333333 0.8 ]
|
|
|
|
mean value: 0.7339583333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.81802073 0.83626891 0.8218026 0.83645404 0.8218555 0.84007722
|
|
0.8442029 0.82971014 0.87318841 0.82246377]
|
|
|
|
mean value: 0.8344044218766529
|
|
|
|
key: test_jcc
|
|
value: [0.70588235 0.6 0.57894737 0.65 0.61904762 0.7
|
|
0.42857143 0.52380952 0.42105263 0.71428571]
|
|
|
|
mean value: 0.5941596638655462
|
|
|
|
key: train_jcc
|
|
value: [0.70414201 0.72560976 0.6993865 0.72392638 0.6993865 0.72839506
|
|
0.73291925 0.71515152 0.77848101 0.71005917]
|
|
|
|
mean value: 0.7217457170229105
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.16587377 1.34297132 1.53643155 1.78063393 1.94020653 1.51666331
|
|
1.33432031 1.60784054 1.42286038 1.54603887]
|
|
|
|
mean value: 1.5193840503692626
|
|
|
|
key: score_time
|
|
value: [0.01217413 0.01547241 0.01505756 0.0166719 0.01963019 0.01518726
|
|
0.01228428 0.02341437 0.01221061 0.01512194]
|
|
|
|
mean value: 0.01572246551513672
|
|
|
|
key: test_mcc
|
|
value: [0.48527095 0.58316015 0.54812195 0.43041423 0.48527095 0.68826048
|
|
0.26726124 0.54433105 0. 0.40824829]
|
|
|
|
mean value: 0.4440339288837554
|
|
|
|
key: train_mcc
|
|
value: [0.97841275 0.97131682 0.97131682 0.97841614 0.9713228 0.97841614
|
|
0.92841417 0.97111648 0.97849211 0.97849211]
|
|
|
|
mean value: 0.9705716324042646
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.77419355 0.77419355 0.70967742 0.74193548 0.83870968
|
|
0.63333333 0.76666667 0.5 0.7 ]
|
|
|
|
mean value: 0.7180645161290322
|
|
|
|
key: train_accuracy
|
|
value: [0.98909091 0.98545455 0.98545455 0.98909091 0.98545455 0.98909091
|
|
0.96376812 0.98550725 0.98913043 0.98913043]
|
|
|
|
mean value: 0.9851172595520422
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.8 0.75862069 0.68965517 0.76470588 0.85714286
|
|
0.64516129 0.74074074 0.48275862 0.72727273]
|
|
|
|
mean value: 0.7180343694876182
|
|
|
|
key: train_fscore
|
|
value: [0.98924731 0.98571429 0.98571429 0.98916968 0.98561151 0.98916968
|
|
0.96453901 0.98561151 0.98924731 0.98924731]
|
|
|
|
mean value: 0.985327188576788
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.7 0.78571429 0.76923077 0.72222222 0.78947368
|
|
0.625 0.83333333 0.5 0.66666667]
|
|
|
|
mean value: 0.7160871730608572
|
|
|
|
key: train_precision
|
|
value: [0.9787234 0.97183099 0.97183099 0.97857143 0.97163121 0.97857143
|
|
0.94444444 0.97857143 0.9787234 0.9787234 ]
|
|
|
|
mean value: 0.9731622120429432
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.93333333 0.73333333 0.625 0.8125 0.9375
|
|
0.66666667 0.66666667 0.46666667 0.8 ]
|
|
|
|
mean value: 0.7308333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98550725 0.99275362 1. 1. ]
|
|
|
|
mean value: 0.9978260869565218
|
|
|
|
key: test_roc_auc
|
|
value: [0.73958333 0.77916667 0.77291667 0.7125 0.73958333 0.83541667
|
|
0.63333333 0.76666667 0.5 0.7 ]
|
|
|
|
mean value: 0.7179166666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.98905109 0.98540146 0.98540146 0.98913043 0.98550725 0.98913043
|
|
0.96376812 0.98550725 0.98913043 0.98913043]
|
|
|
|
mean value: 0.9851158362424628
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.66666667 0.61111111 0.52631579 0.61904762 0.75
|
|
0.47619048 0.58823529 0.31818182 0.57142857]
|
|
|
|
mean value: 0.568273290177315
|
|
|
|
key: train_jcc
|
|
value: [0.9787234 0.97183099 0.97183099 0.97857143 0.97163121 0.97857143
|
|
0.93150685 0.97163121 0.9787234 0.9787234 ]
|
|
|
|
mean value: 0.9711744302402386
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02972984 0.02371287 0.02072167 0.02109385 0.0209024 0.0185678
|
|
0.01990628 0.02344799 0.02155495 0.02220154]
|
|
|
|
mean value: 0.022183918952941896
|
|
|
|
key: score_time
|
|
value: [0.01491976 0.00909758 0.00885677 0.00869632 0.00869989 0.0090518
|
|
0.00870323 0.00861406 0.008672 0.00875735]
|
|
|
|
mean value: 0.00940687656402588
|
|
|
|
key: test_mcc
|
|
value: [0.54812195 0.44824996 0.54812195 0.29069387 0.6125 0.67916667
|
|
0.13608276 0.47087096 0.27216553 0.53452248]
|
|
|
|
mean value: 0.4540496132732843
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77419355 0.70967742 0.77419355 0.64516129 0.80645161 0.83870968
|
|
0.56666667 0.73333333 0.63333333 0.76666667]
|
|
|
|
mean value: 0.7248387096774194
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75862069 0.74285714 0.75862069 0.68571429 0.8125 0.83870968
|
|
0.60606061 0.75 0.66666667 0.75862069]
|
|
|
|
mean value: 0.7378370447683573
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.65 0.78571429 0.63157895 0.8125 0.86666667
|
|
0.55555556 0.70588235 0.61111111 0.78571429]
|
|
|
|
mean value: 0.7190437490785788
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.73333333 0.86666667 0.73333333 0.75 0.8125 0.8125
|
|
0.66666667 0.8 0.73333333 0.73333333]
|
|
|
|
mean value: 0.7641666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.77291667 0.71458333 0.77291667 0.64166667 0.80625 0.83958333
|
|
0.56666667 0.73333333 0.63333333 0.76666667]
|
|
|
|
mean value: 0.7247916666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.61111111 0.59090909 0.61111111 0.52173913 0.68421053 0.72222222
|
|
0.43478261 0.6 0.5 0.61111111]
|
|
|
|
mean value: 0.5887196911910871
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1064136 0.10794759 0.11117125 0.10723972 0.1092217 0.13529468
|
|
0.11058927 0.11555004 0.11684036 0.1151557 ]
|
|
|
|
mean value: 0.11354238986968994
|
|
|
|
key: score_time
|
|
value: [0.01747465 0.01882863 0.01768613 0.01932931 0.01908422 0.01826024
|
|
0.01802802 0.01877666 0.01917076 0.01901317]
|
|
|
|
mean value: 0.01856517791748047
|
|
|
|
key: test_mcc
|
|
value: [0.35416667 0.43041423 0.6125 0.37191715 0.61608311 0.22630095
|
|
0.21821789 0.53452248 0.20751434 0.68041382]
|
|
|
|
mean value: 0.4252050648686833
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.67741935 0.70967742 0.80645161 0.67741935 0.80645161 0.61290323
|
|
0.6 0.76666667 0.6 0.83333333]
|
|
|
|
mean value: 0.7090322580645161
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.72727273 0.8 0.64285714 0.82352941 0.66666667
|
|
0.5 0.77419355 0.53846154 0.84848485]
|
|
|
|
mean value: 0.6988132550561393
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.8 0.75 0.77777778 0.6
|
|
0.66666667 0.75 0.63636364 0.77777778]
|
|
|
|
mean value: 0.7091919191919192
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.8 0.8 0.5625 0.875 0.75
|
|
0.4 0.8 0.46666667 0.93333333]
|
|
|
|
mean value: 0.7054166666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.67708333 0.7125 0.80625 0.68125 0.80416667 0.60833333
|
|
0.6 0.76666667 0.6 0.83333333]
|
|
|
|
mean value: 0.7089583333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.57142857 0.66666667 0.47368421 0.7 0.5
|
|
0.33333333 0.63157895 0.36842105 0.73684211]
|
|
|
|
mean value: 0.5481954887218045
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01197481 0.01119828 0.01121902 0.01106596 0.01112223 0.01120782
|
|
0.01562881 0.01700401 0.01172519 0.00989151]
|
|
|
|
mean value: 0.01220376491546631
|
|
|
|
key: score_time
|
|
value: [0.00996518 0.00986505 0.00948811 0.00981331 0.009619 0.00965691
|
|
0.01629329 0.01413226 0.00995064 0.00931406]
|
|
|
|
mean value: 0.010809779167175293
|
|
|
|
key: test_mcc
|
|
value: [-0.03333333 0.225 0.55 0.29960206 0.16035675 0.225
|
|
0.20180184 0.21821789 0.40089186 0.20751434]
|
|
|
|
mean value: 0.2455051405039718
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.48387097 0.61290323 0.77419355 0.64516129 0.58064516 0.61290323
|
|
0.6 0.6 0.7 0.6 ]
|
|
|
|
mean value: 0.6209677419354839
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.46666667 0.6 0.77419355 0.62068966 0.64864865 0.625
|
|
0.57142857 0.5 0.68965517 0.64705882]
|
|
|
|
mean value: 0.6143341086246602
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.46666667 0.6 0.75 0.69230769 0.57142857 0.625
|
|
0.61538462 0.66666667 0.71428571 0.57894737]
|
|
|
|
mean value: 0.6280687295160979
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.46666667 0.6 0.8 0.5625 0.75 0.625
|
|
0.53333333 0.4 0.66666667 0.73333333]
|
|
|
|
mean value: 0.61375
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.48333333 0.6125 0.775 0.64791667 0.575 0.6125
|
|
0.6 0.6 0.7 0.6 ]
|
|
|
|
mean value: 0.620625
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.30434783 0.42857143 0.63157895 0.45 0.48 0.45454545
|
|
0.4 0.33333333 0.52631579 0.47826087]
|
|
|
|
mean value: 0.44869536489444956
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.63966227 1.56840277 1.54834175 1.46470094 1.47478914 1.46959472
|
|
1.63766098 1.51096797 1.45877457 1.47123885]
|
|
|
|
mean value: 1.5244133949279786
|
|
|
|
key: score_time
|
|
value: [0.09989715 0.11292648 0.08991265 0.09029508 0.09051442 0.0917325
|
|
0.10776639 0.0899961 0.08995199 0.09081578]
|
|
|
|
mean value: 0.09538085460662842
|
|
|
|
key: test_mcc
|
|
value: [0.4184137 0.5612264 0.55 0.29166667 0.87770745 0.68826048
|
|
0.27216553 0.40824829 0.26726124 0.73994007]
|
|
|
|
mean value: 0.5074889832381169
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.70967742 0.77419355 0.77419355 0.64516129 0.93548387 0.83870968
|
|
0.63333333 0.7 0.63333333 0.86666667]
|
|
|
|
mean value: 0.7510752688172043
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.68965517 0.78787879 0.77419355 0.64516129 0.94117647 0.85714286
|
|
0.59259259 0.66666667 0.62068966 0.875 ]
|
|
|
|
mean value: 0.7450157041165023
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.72222222 0.75 0.66666667 0.88888889 0.78947368
|
|
0.66666667 0.75 0.64285714 0.82352941]
|
|
|
|
mean value: 0.7414590397562534
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.86666667 0.8 0.625 1. 0.9375
|
|
0.53333333 0.6 0.6 0.93333333]
|
|
|
|
mean value: 0.75625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.77708333 0.775 0.64583333 0.93333333 0.83541667
|
|
0.63333333 0.7 0.63333333 0.86666667]
|
|
|
|
mean value: 0.7508333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.52631579 0.65 0.63157895 0.47619048 0.88888889 0.75
|
|
0.42105263 0.5 0.45 0.77777778]
|
|
|
|
mean value: 0.6071804511278195
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.58
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.94123125 0.97245646 0.97029901 0.94267368 0.90734649 0.89831591
|
|
0.94338274 0.94464684 0.95146275 0.9647851 ]
|
|
|
|
mean value: 0.9436600208282471
|
|
|
|
key: score_time
|
|
value: [0.21182108 0.21188569 0.25536942 0.17796612 0.22923851 0.23547339
|
|
0.26382279 0.22573686 0.20815539 0.23887897]
|
|
|
|
mean value: 0.22583482265472413
|
|
|
|
key: test_mcc
|
|
value: [0.48333333 0.63696156 0.55 0.29166667 0.87770745 0.74689528
|
|
0.40824829 0.53452248 0.27216553 0.73994007]
|
|
|
|
mean value: 0.5541440669610146
|
|
|
|
key: train_mcc
|
|
value: [0.89827013 0.87638191 0.86917422 0.87658045 0.9128273 0.87638845
|
|
0.90582088 0.87683462 0.87683462 0.88415083]
|
|
|
|
mean value: 0.885326339082589
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.80645161 0.77419355 0.64516129 0.93548387 0.87096774
|
|
0.7 0.76666667 0.63333333 0.86666667]
|
|
|
|
mean value: 0.7740860215053763
|
|
|
|
key: train_accuracy
|
|
value: [0.94909091 0.93818182 0.93454545 0.93818182 0.95636364 0.93818182
|
|
0.95289855 0.9384058 0.9384058 0.94202899]
|
|
|
|
mean value: 0.9426284584980237
|
|
|
|
key: test_fscore
|
|
value: [0.73333333 0.82352941 0.77419355 0.64516129 0.94117647 0.88235294
|
|
0.66666667 0.75862069 0.59259259 0.875 ]
|
|
|
|
mean value: 0.7692626944486853
|
|
|
|
key: train_fscore
|
|
value: [0.94964029 0.93862816 0.9352518 0.93862816 0.95652174 0.93818182
|
|
0.95306859 0.93862816 0.93862816 0.94244604]
|
|
|
|
mean value: 0.9429622914245479
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.73684211 0.75 0.66666667 0.88888889 0.83333333
|
|
0.75 0.78571429 0.66666667 0.82352941]
|
|
|
|
mean value: 0.7634974691631038
|
|
|
|
key: train_precision /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
value: [0.94285714 0.9352518 0.92857143 0.92857143 0.94964029 0.93478261
|
|
0.94964029 0.9352518 0.9352518 0.93571429]
|
|
|
|
mean value: 0.937553286563296
|
|
|
|
key: test_recall
|
|
value: [0.73333333 0.93333333 0.8 0.625 1. 0.9375
|
|
0.6 0.73333333 0.53333333 0.93333333]
|
|
|
|
mean value: 0.7829166666666667
|
|
|
|
key: train_recall
|
|
value: [0.95652174 0.94202899 0.94202899 0.94890511 0.96350365 0.94160584
|
|
0.95652174 0.94202899 0.94202899 0.94927536]
|
|
|
|
mean value: 0.9484449381148842
|
|
|
|
key: test_roc_auc
|
|
value: [0.74166667 0.81041667 0.775 0.64583333 0.93333333 0.86875
|
|
0.7 0.76666667 0.63333333 0.86666667]
|
|
|
|
mean value: 0.7741666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.94906379 0.93816778 0.93451814 0.93822067 0.95638951 0.93819422
|
|
0.95289855 0.9384058 0.9384058 0.94202899]
|
|
|
|
mean value: 0.9426293240241194
|
|
|
|
key: test_jcc
|
|
value: [0.57894737 0.7 0.63157895 0.47619048 0.88888889 0.78947368
|
|
0.5 0.61111111 0.42105263 0.77777778]
|
|
|
|
mean value: 0.6375020885547201
|
|
|
|
key: train_jcc
|
|
value: [0.90410959 0.88435374 0.87837838 0.88435374 0.91666667 0.88356164
|
|
0.91034483 0.88435374 0.88435374 0.89115646]
|
|
|
|
mean value: 0.8921632534079392
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02259088 0.00951052 0.0093019 0.00934172 0.00963044 0.0092907
|
|
0.00934792 0.00934172 0.00938034 0.00926995]
|
|
|
|
mean value: 0.010700607299804687
|
|
|
|
key: score_time
|
|
value: [0.01404405 0.00874329 0.00877905 0.00871348 0.00866389 0.00868344
|
|
0.00880432 0.00881791 0.0087018 0.00872469]
|
|
|
|
mean value: 0.009267592430114746
|
|
|
|
key: test_mcc
|
|
value: [0.48527095 0.10687275 0.225 0.54812195 0.61608311 0.61608311
|
|
0.06666667 0.20180184 0.13608276 0.56568542]
|
|
|
|
mean value: 0.3567668567726006
|
|
|
|
key: train_mcc
|
|
value: [0.44830704 0.49105638 0.48388137 0.5145107 0.44859891 0.45491116
|
|
0.49405246 0.47311844 0.50810087 0.45749571]
|
|
|
|
mean value: 0.4774033036828667
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.5483871 0.61290323 0.77419355 0.80645161 0.80645161
|
|
0.53333333 0.6 0.56666667 0.76666667]
|
|
|
|
mean value: 0.6756989247311828
|
|
|
|
key: train_accuracy
|
|
value: [0.72363636 0.74545455 0.74181818 0.75636364 0.72363636 0.72727273
|
|
0.74637681 0.73550725 0.75362319 0.72826087]
|
|
|
|
mean value: 0.7381949934123847
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.58823529 0.6 0.78787879 0.82352941 0.82352941
|
|
0.53333333 0.625 0.51851852 0.8 ]
|
|
|
|
mean value: 0.6814310471663413
|
|
|
|
key: train_fscore
|
|
value: [0.73426573 0.75 0.74733096 0.76491228 0.73239437 0.7311828
|
|
0.75524476 0.74740484 0.76056338 0.73684211]
|
|
|
|
mean value: 0.746014122279795
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.52631579 0.6 0.76470588 0.77777778 0.77777778
|
|
0.53333333 0.58823529 0.58333333 0.7 ]
|
|
|
|
mean value: 0.6620709957397264
|
|
|
|
key: train_precision
|
|
value: [0.70945946 0.73943662 0.73426573 0.73648649 0.70748299 0.71830986
|
|
0.72972973 0.71523179 0.73972603 0.71428571]
|
|
|
|
mean value: 0.7244414411774374
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.6 0.8125 0.875 0.875
|
|
0.53333333 0.66666667 0.46666667 0.93333333]
|
|
|
|
mean value: 0.7095833333333333
|
|
|
|
key: train_recall
|
|
value: [0.76086957 0.76086957 0.76086957 0.79562044 0.75912409 0.74452555
|
|
0.7826087 0.7826087 0.7826087 0.76086957]
|
|
|
|
mean value: 0.7690574420818788
|
|
|
|
key: test_roc_auc
|
|
value: [0.73958333 0.55208333 0.6125 0.77291667 0.80416667 0.80416667
|
|
0.53333333 0.6 0.56666667 0.76666667]
|
|
|
|
mean value: 0.6752083333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.72350048 0.74539829 0.74174865 0.75650587 0.72376494 0.72733524
|
|
0.74637681 0.73550725 0.75362319 0.72826087]
|
|
|
|
mean value: 0.7382021580450651
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.41666667 0.42857143 0.65 0.7 0.7
|
|
0.36363636 0.45454545 0.35 0.66666667]
|
|
|
|
mean value: 0.5285642135642136
|
|
|
|
key: train_jcc
|
|
value: [0.5801105 0.6 0.59659091 0.61931818 0.57777778 0.57627119
|
|
0.60674157 0.59668508 0.61363636 0.58333333]
|
|
|
|
mean value: 0.5950464905241448
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.10471797 0.07124925 0.07563305 0.07079864 0.08580709 0.07676268
|
|
0.07324243 0.07241774 0.19062924 0.06637692]
|
|
|
|
mean value: 0.08876349925994872
|
|
|
|
key: score_time
|
|
value: [0.01103663 0.01082015 0.01093411 0.01060057 0.01110458 0.01197505
|
|
0.01113701 0.010674 0.01097536 0.01169777]
|
|
|
|
mean value: 0.011095523834228516
|
|
|
|
key: test_mcc
|
|
value: [0.74689528 0.63696156 0.6778302 0.42083333 0.87083333 0.6125
|
|
0.33333333 0.70710678 0.53452248 0.47087096]
|
|
|
|
mean value: 0.6011687265378022
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.87096774 0.80645161 0.83870968 0.70967742 0.93548387 0.80645161
|
|
0.66666667 0.83333333 0.76666667 0.73333333]
|
|
|
|
mean value: 0.7967741935483871
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.82352941 0.82758621 0.70967742 0.9375 0.8125
|
|
0.66666667 0.8 0.75862069 0.71428571]
|
|
|
|
mean value: 0.7907508965766507
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.73684211 0.85714286 0.73333333 0.9375 0.8125
|
|
0.66666667 1. 0.78571429 0.76923077]
|
|
|
|
mean value: 0.8222006940427993
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.93333333 0.8 0.6875 0.9375 0.8125
|
|
0.66666667 0.66666667 0.73333333 0.66666667]
|
|
|
|
mean value: 0.7704166666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86875 0.81041667 0.8375 0.71041667 0.93541667 0.80625
|
|
0.66666667 0.83333333 0.76666667 0.73333333]
|
|
|
|
mean value: 0.796875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.7 0.70588235 0.55 0.88235294 0.68421053
|
|
0.5 0.66666667 0.61111111 0.55555556]
|
|
|
|
mean value: 0.660577915376677
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04948115 0.0599544 0.0648675 0.05865765 0.06017065 0.06014323
|
|
0.07210636 0.07587576 0.13489628 0.06331754]
|
|
|
|
mean value: 0.06994705200195313
|
|
|
|
key: score_time
|
|
value: [0.02337527 0.02057123 0.02262139 0.03516126 0.02229404 0.02138257
|
|
0.0203135 0.02203608 0.02431822 0.0224123 ]
|
|
|
|
mean value: 0.023448586463928223
|
|
|
|
key: test_mcc
|
|
value: [0.61608311 0.63696156 0.80753845 0.23939495 0.29166667 0.49612132
|
|
0.27216553 0.40089186 0. 0.54433105]
|
|
|
|
mean value: 0.4305154503434674
|
|
|
|
key: train_mcc
|
|
value: [0.774557 0.79775488 0.78259513 0.81835983 0.81090659 0.8255488
|
|
0.84802649 0.81937858 0.85543189 0.81903414]
|
|
|
|
mean value: 0.8151593320263935
|
|
|
|
key: test_accuracy
|
|
value: [0.80645161 0.80645161 0.90322581 0.61290323 0.64516129 0.74193548
|
|
0.63333333 0.7 0.5 0.76666667]
|
|
|
|
mean value: 0.7116129032258064
|
|
|
|
key: train_accuracy
|
|
value: [0.88727273 0.89818182 0.89090909 0.90909091 0.90545455 0.91272727
|
|
0.92391304 0.90942029 0.92753623 0.90942029]
|
|
|
|
mean value: 0.9073926218708828
|
|
|
|
key: test_fscore
|
|
value: [0.78571429 0.82352941 0.89655172 0.57142857 0.64516129 0.77777778
|
|
0.59259259 0.68965517 0.57142857 0.78787879]
|
|
|
|
mean value: 0.7141718185459597
|
|
|
|
key: train_fscore
|
|
value: [0.88808664 0.89552239 0.88888889 0.90774908 0.90510949 0.91304348
|
|
0.92307692 0.90774908 0.92857143 0.91039427]
|
|
|
|
mean value: 0.906819165872271
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.73684211 0.92857143 0.66666667 0.66666667 0.7
|
|
0.66666667 0.71428571 0.5 0.72222222]
|
|
|
|
mean value: 0.714807531649637
|
|
|
|
key: train_precision
|
|
value: [0.88489209 0.92307692 0.90909091 0.91791045 0.90510949 0.90647482
|
|
0.93333333 0.92481203 0.91549296 0.90070922]
|
|
|
|
mean value: 0.9120902216468098
|
|
|
|
key: test_recall
|
|
value: [0.73333333 0.93333333 0.86666667 0.5 0.625 0.875
|
|
0.53333333 0.66666667 0.66666667 0.86666667]
|
|
|
|
mean value: 0.7266666666666667
|
|
|
|
key: train_recall
|
|
value: [0.89130435 0.86956522 0.86956522 0.89781022 0.90510949 0.91970803
|
|
0.91304348 0.89130435 0.94202899 0.92028986]
|
|
|
|
mean value: 0.9019729186501639
|
|
|
|
key: test_roc_auc
|
|
value: [0.80416667 0.81041667 0.90208333 0.61666667 0.64583333 0.7375
|
|
0.63333333 0.7 0.5 0.76666667]
|
|
|
|
mean value: 0.7116666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.88725801 0.89828626 0.89098699 0.90905004 0.9054533 0.91275257
|
|
0.92391304 0.90942029 0.92753623 0.90942029]
|
|
|
|
mean value: 0.9074077012588596
|
|
|
|
key: test_jcc
|
|
value: [0.64705882 0.7 0.8125 0.4 0.47619048 0.63636364
|
|
0.42105263 0.52631579 0.4 0.65 ]
|
|
|
|
mean value: 0.5669481357136156
|
|
|
|
key: train_jcc
|
|
value: [0.7987013 0.81081081 0.8 0.83108108 0.82666667 0.84
|
|
0.85714286 0.83108108 0.86666667 0.83552632]
|
|
|
|
mean value: 0.8297676777939935
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01257849 0.01046824 0.01064348 0.01061773 0.01040769 0.01155543
|
|
0.01041126 0.01048541 0.01040077 0.0099988 ]
|
|
|
|
mean value: 0.010756731033325195
|
|
|
|
key: score_time
|
|
value: [0.01009321 0.00994134 0.0097146 0.00904965 0.00978208 0.00949216
|
|
0.00942087 0.00954843 0.00939345 0.0097518 ]
|
|
|
|
mean value: 0.009618759155273438
|
|
|
|
key: test_mcc
|
|
value: [ 0.48954403 0.29166667 0.48333333 0.35416667 0.76594169 0.35416667
|
|
0.26726124 0.34585723 -0.06726728 0.36369648]
|
|
|
|
mean value: 0.36483667318329865
|
|
|
|
key: train_mcc
|
|
value: [0.40551667 0.43620712 0.42847904 0.42985511 0.39834631 0.42207985
|
|
0.41488848 0.42314354 0.45924272 0.44542163]
|
|
|
|
mean value: 0.4263180469296017
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.64516129 0.74193548 0.67741935 0.87096774 0.67741935
|
|
0.63333333 0.66666667 0.46666667 0.66666667]
|
|
|
|
mean value: 0.6788172043010753
|
|
|
|
key: train_accuracy
|
|
value: [0.70181818 0.71636364 0.71272727 0.71272727 0.69818182 0.70909091
|
|
0.70652174 0.71014493 0.72826087 0.72101449]
|
|
|
|
mean value: 0.7116851119894598
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.64516129 0.73333333 0.6875 0.88888889 0.6875
|
|
0.64516129 0.70588235 0.42857143 0.72222222]
|
|
|
|
mean value: 0.689422080660221
|
|
|
|
key: train_fscore
|
|
value: [0.71724138 0.73469388 0.73037543 0.73037543 0.71080139 0.7260274
|
|
0.71972318 0.7260274 0.74226804 0.73720137]
|
|
|
|
mean value: 0.7274734888168287
|
|
|
|
key: test_precision
|
|
value: [0.70588235 0.625 0.73333333 0.6875 0.8 0.6875
|
|
0.625 0.63157895 0.46153846 0.61904762]
|
|
|
|
mean value: 0.6576380714229011
|
|
|
|
key: train_precision
|
|
value: [0.68421053 0.69230769 0.69032258 0.68589744 0.68 0.68387097
|
|
0.68874172 0.68831169 0.70588235 0.69677419]
|
|
|
|
mean value: 0.6896319159563571
|
|
|
|
key: test_recall
|
|
value: [0.8 0.66666667 0.73333333 0.6875 1. 0.6875
|
|
0.66666667 0.8 0.4 0.86666667]
|
|
|
|
mean value: 0.7308333333333333
|
|
|
|
key: train_recall
|
|
value: [0.75362319 0.7826087 0.77536232 0.7810219 0.74452555 0.77372263
|
|
0.75362319 0.76811594 0.7826087 0.7826087 ]
|
|
|
|
mean value: 0.7697820797630383
|
|
|
|
key: test_roc_auc
|
|
value: [0.74375 0.64583333 0.74166667 0.67708333 0.86666667 0.67708333
|
|
0.63333333 0.66666667 0.46666667 0.66666667]
|
|
|
|
mean value: 0.6785416666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.70162911 0.71612187 0.71249868 0.71297472 0.69834973 0.70932508
|
|
0.70652174 0.71014493 0.72826087 0.72101449]
|
|
|
|
mean value: 0.7116841214429281
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.47619048 0.57894737 0.52380952 0.8 0.52380952
|
|
0.47619048 0.54545455 0.27272727 0.56521739]
|
|
|
|
mean value: 0.5362346577907219
|
|
|
|
key: train_jcc
|
|
value: [0.55913978 0.58064516 0.57526882 0.57526882 0.55135135 0.56989247
|
|
0.56216216 0.56989247 0.59016393 0.58378378]
|
|
|
|
mean value: 0.5717568758605247
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01236296 0.01704454 0.01993775 0.01641083 0.01736665 0.01681542
|
|
0.01517296 0.01890635 0.01642418 0.01728415]
|
|
|
|
mean value: 0.01677258014678955
|
|
|
|
key: score_time
|
|
value: [0.00945234 0.01159 0.01239657 0.01217175 0.01234746 0.01236224
|
|
0.0121429 0.01241302 0.01228023 0.01225352]
|
|
|
|
mean value: 0.011941003799438476
|
|
|
|
key: test_mcc
|
|
value: [0.67916667 0.57104024 0.80753845 0.4365267 0.39198315 0.27740787
|
|
0. 0.53452248 0.06726728 0.55167728]
|
|
|
|
mean value: 0.4317130127721892
|
|
|
|
key: train_mcc
|
|
value: [0.66553822 0.64761904 0.61946219 0.63673168 0.64996343 0.60042101
|
|
0.22232782 0.70466426 0.72059578 0.58516212]
|
|
|
|
mean value: 0.6052485544346587
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.74193548 0.90322581 0.70967742 0.67741935 0.61290323
|
|
0.5 0.76666667 0.53333333 0.73333333]
|
|
|
|
mean value: 0.7017204301075268
|
|
|
|
key: train_accuracy
|
|
value: [0.83272727 0.80363636 0.80727273 0.79636364 0.80727273 0.77090909
|
|
0.54710145 0.84782609 0.85869565 0.76449275]
|
|
|
|
mean value: 0.7836297760210804
|
|
|
|
key: test_fscore
|
|
value: [0.83870968 0.78947368 0.89655172 0.75675676 0.61538462 0.71428571
|
|
0.11764706 0.77419355 0.5 0.78947368]
|
|
|
|
mean value: 0.6792476463616052
|
|
|
|
key: train_fscore
|
|
value: [0.83211679 0.83333333 0.7953668 0.82716049 0.76855895 0.81081081
|
|
0.17218543 0.8590604 0.85171103 0.80597015]
|
|
|
|
mean value: 0.7556274182642174
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.65217391 0.92857143 0.66666667 0.8 0.57692308
|
|
0.5 0.75 0.53846154 0.65217391]
|
|
|
|
mean value: 0.6877470536709667
|
|
|
|
key: train_precision
|
|
value: [0.83823529 0.72580645 0.85123967 0.71657754 0.95652174 0.68877551
|
|
1. 0.8 0.896 0.68527919]
|
|
|
|
mean value: 0.8158435392410766
|
|
|
|
key: test_recall
|
|
value: [0.86666667 1. 0.86666667 0.875 0.5 0.9375
|
|
0.06666667 0.8 0.46666667 1. ]
|
|
|
|
mean value: 0.7379166666666667
|
|
|
|
key: train_recall
|
|
value: [0.82608696 0.97826087 0.74637681 0.97810219 0.64233577 0.98540146
|
|
0.0942029 0.92753623 0.8115942 0.97826087]
|
|
|
|
mean value: 0.7968158256638105
|
|
|
|
key: test_roc_auc
|
|
value: [0.83958333 0.75 0.90208333 0.70416667 0.68333333 0.60208333
|
|
0.5 0.76666667 0.53333333 0.73333333]
|
|
|
|
mean value: 0.7014583333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.83275151 0.80299905 0.80749498 0.79702211 0.80667513 0.77168624
|
|
0.54710145 0.84782609 0.85869565 0.76449275]
|
|
|
|
mean value: 0.7836744948693537
|
|
|
|
key: test_jcc
|
|
value: [0.72222222 0.65217391 0.8125 0.60869565 0.44444444 0.55555556
|
|
0.0625 0.63157895 0.33333333 0.65217391]
|
|
|
|
mean value: 0.5475177981184847
|
|
|
|
key: train_jcc
|
|
value: [0.7125 0.71428571 0.66025641 0.70526316 0.62411348 0.68181818
|
|
0.0942029 0.75294118 0.74172185 0.675 ]
|
|
|
|
mean value: 0.6362102868758297
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01784468 0.01860023 0.01662111 0.01766586 0.01797056 0.01809335
|
|
0.01779962 0.01781583 0.01724243 0.01705742]
|
|
|
|
mean value: 0.01767110824584961
|
|
|
|
key: score_time
|
|
value: [0.01264238 0.01235294 0.01229596 0.01217198 0.01212764 0.01237345
|
|
0.01226807 0.01245046 0.01237631 0.0122304 ]
|
|
|
|
mean value: 0.01232895851135254
|
|
|
|
key: test_mcc
|
|
value: [0.51837044 0.43041423 0.6681531 0.225 0.5612264 0.50443936
|
|
0.27216553 0.4472136 0.13608276 0.53452248]
|
|
|
|
mean value: 0.4297587900694683
|
|
|
|
key: train_mcc
|
|
value: [0.65869918 0.72077077 0.53573576 0.80364983 0.62019336 0.69271973
|
|
0.68448145 0.45303441 0.70646128 0.71201679]
|
|
|
|
mean value: 0.6587762562073672
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.70967742 0.80645161 0.61290323 0.77419355 0.74193548
|
|
0.63333333 0.66666667 0.56666667 0.76666667]
|
|
|
|
mean value: 0.7020430107526882
|
|
|
|
key: train_accuracy
|
|
value: [0.82181818 0.85818182 0.73090909 0.90181818 0.78909091 0.83636364
|
|
0.82971014 0.67028986 0.8442029 0.85507246]
|
|
|
|
mean value: 0.8137457180500659
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.72727273 0.83333333 0.625 0.75862069 0.71428571
|
|
0.66666667 0.75 0.51851852 0.75862069]
|
|
|
|
mean value: 0.7018985006053972
|
|
|
|
key: train_fscore
|
|
value: [0.80161943 0.85057471 0.78612717 0.9010989 0.74107143 0.81327801
|
|
0.84984026 0.7520436 0.8244898 0.84962406]
|
|
|
|
mean value: 0.8169767359831244
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.66666667 0.71428571 0.625 0.84615385 0.83333333
|
|
0.61111111 0.6 0.58333333 0.78571429]
|
|
|
|
mean value: 0.7154487179487179
|
|
|
|
key: train_precision
|
|
value: [0.90825688 0.90243902 0.65384615 0.90441176 0.95402299 0.94230769
|
|
0.76 0.60262009 0.94392523 0.8828125 ]
|
|
|
|
mean value: 0.8454642325470769
|
|
|
|
key: test_recall
|
|
value: [0.53333333 0.8 1. 0.625 0.6875 0.625
|
|
0.73333333 1. 0.46666667 0.73333333]
|
|
|
|
mean value: 0.7204166666666667
|
|
|
|
key: train_recall
|
|
value: [0.7173913 0.80434783 0.98550725 0.89781022 0.60583942 0.71532847
|
|
0.96376812 1. 0.73188406 0.81884058]
|
|
|
|
mean value: 0.8240717232624564
|
|
|
|
key: test_roc_auc
|
|
value: [0.73541667 0.7125 0.8125 0.6125 0.77708333 0.74583333
|
|
0.63333333 0.66666667 0.56666667 0.76666667]
|
|
|
|
mean value: 0.7029166666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.8221993 0.85837829 0.7299799 0.90180366 0.78842695 0.8359251
|
|
0.82971014 0.67028986 0.8442029 0.85507246]
|
|
|
|
mean value: 0.8135988575055538
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.57142857 0.71428571 0.45454545 0.61111111 0.55555556
|
|
0.5 0.6 0.35 0.61111111]
|
|
|
|
mean value: 0.5468037518037518
|
|
|
|
key: train_jcc
|
|
value: [0.66891892 0.74 0.64761905 0.82 0.58865248 0.68531469
|
|
0.73888889 0.60262009 0.70138889 0.73856209]
|
|
|
|
mean value: 0.6931965090739446
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14925075 0.13835835 0.14027524 0.14160872 0.13706279 0.13644886
|
|
0.13941336 0.13494396 0.14018297 0.1390686 ]
|
|
|
|
mean value: 0.1396613597869873
|
|
|
|
key: score_time
|
|
value: [0.01658058 0.01638126 0.01686168 0.01675653 0.01621389 0.01587439
|
|
0.01622796 0.01635647 0.01603079 0.01685405]
|
|
|
|
mean value: 0.0164137601852417
|
|
|
|
key: test_mcc
|
|
value: [0.48527095 0.63696156 0.6310315 0.35983579 0.69203857 0.48333333
|
|
0.26726124 0.80178373 0. 0.33333333]
|
|
|
|
mean value: 0.46908499974083584
|
|
|
|
key: train_mcc
|
|
value: [0.9710099 0.96386248 0.9710099 0.9713228 0.96366041 0.94911399
|
|
0.99277969 0.97142265 0.97828655 0.96379342]
|
|
|
|
mean value: 0.969626181132409
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.80645161 0.80645161 0.67741935 0.83870968 0.74193548
|
|
0.63333333 0.9 0.5 0.66666667]
|
|
|
|
mean value: 0.7312903225806452
|
|
|
|
key: train_accuracy
|
|
value: [0.98545455 0.98181818 0.98545455 0.98545455 0.98181818 0.97454545
|
|
0.99637681 0.98550725 0.98913043 0.98188406]
|
|
|
|
mean value: 0.9847444005270092
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.82352941 0.76923077 0.66666667 0.82758621 0.75
|
|
0.62068966 0.89655172 0.54545455 0.66666667]
|
|
|
|
mean value: 0.7280661360275964
|
|
|
|
key: train_fscore
|
|
value: [0.98561151 0.98207885 0.98561151 0.98561151 0.98168498 0.97435897
|
|
0.99638989 0.98571429 0.98916968 0.98194946]
|
|
|
|
mean value: 0.9848180652449695
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.73684211 0.90909091 0.71428571 0.92307692 0.75
|
|
0.64285714 0.92857143 0.5 0.66666667]
|
|
|
|
mean value: 0.7540621659042712
|
|
|
|
key: train_precision
|
|
value: [0.97857143 0.97163121 0.97857143 0.97163121 0.98529412 0.97794118
|
|
0.99280576 0.97183099 0.98561151 0.97841727]
|
|
|
|
mean value: 0.9792306080897616
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.93333333 0.66666667 0.625 0.75 0.75
|
|
0.6 0.86666667 0.6 0.66666667]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_recall
|
|
value: [0.99275362 0.99275362 0.99275362 1. 0.97810219 0.97080292
|
|
1. 1. 0.99275362 0.98550725]
|
|
|
|
mean value: 0.9905426848619486
|
|
|
|
key: test_roc_auc
|
|
value: [0.73958333 0.81041667 0.80208333 0.67916667 0.84166667 0.74166667
|
|
0.63333333 0.9 0.5 0.66666667]
|
|
|
|
mean value: 0.7314583333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.98542791 0.98177827 0.98542791 0.98550725 0.98180472 0.97453189
|
|
0.99637681 0.98550725 0.98913043 0.98188406]
|
|
|
|
mean value: 0.9847376494234634
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.7 0.625 0.5 0.70588235 0.6
|
|
0.45 0.8125 0.375 0.5 ]
|
|
|
|
mean value: 0.5823937908496732
|
|
|
|
key: train_jcc
|
|
value: [0.97163121 0.96478873 0.97163121 0.97163121 0.96402878 0.95
|
|
0.99280576 0.97183099 0.97857143 0.96453901]
|
|
|
|
mean value: 0.9701458303368864
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06289816 0.06136084 0.07522416 0.06434226 0.06199098 0.06934357
|
|
0.07377386 0.05908513 0.06012392 0.05691075]
|
|
|
|
mean value: 0.06450536251068115
|
|
|
|
key: score_time
|
|
value: [0.02191448 0.03698492 0.02953982 0.02953553 0.03258038 0.01802278
|
|
0.02309513 0.02666569 0.02374339 0.02491879]
|
|
|
|
mean value: 0.026700091361999512
|
|
|
|
key: test_mcc
|
|
value: [0.68826048 0.74896053 0.68826048 0.35983579 0.67916667 0.6778302
|
|
0.33333333 0.60540551 0.47087096 0.40089186]
|
|
|
|
mean value: 0.5652815806670454
|
|
|
|
key: train_mcc
|
|
value: [0.95033154 0.9713228 0.95646654 0.9142571 0.92883052 0.94340394
|
|
0.97828655 0.97142265 0.9436159 0.95652174]
|
|
|
|
mean value: 0.9514459283669261
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.87096774 0.83870968 0.67741935 0.83870968 0.83870968
|
|
0.66666667 0.8 0.73333333 0.7 ]
|
|
|
|
mean value: 0.7803225806451612
|
|
|
|
key: train_accuracy
|
|
value: [0.97454545 0.98545455 0.97818182 0.95636364 0.96363636 0.97090909
|
|
0.98913043 0.98550725 0.97101449 0.97826087]
|
|
|
|
mean value: 0.975300395256917
|
|
|
|
key: test_fscore
|
|
value: [0.81481481 0.875 0.81481481 0.66666667 0.83870968 0.84848485
|
|
0.66666667 0.78571429 0.71428571 0.68965517]
|
|
|
|
mean value: 0.7714812661280959
|
|
|
|
key: train_fscore
|
|
value: [0.9739777 0.98529412 0.97810219 0.95488722 0.96240602 0.96992481
|
|
0.98916968 0.98529412 0.97014925 0.97826087]
|
|
|
|
mean value: 0.9747465963742021
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.82352941 0.91666667 0.71428571 0.86666667 0.82352941
|
|
0.66666667 0.84615385 0.76923077 0.71428571]
|
|
|
|
mean value: 0.8057681534152122
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.98529412 0.98449612 0.99224806 1.
|
|
0.98561151 1. 1. 0.97826087]
|
|
|
|
mean value: 0.9925910684050154
|
|
|
|
key: test_recall
|
|
value: [0.73333333 0.93333333 0.73333333 0.625 0.8125 0.875
|
|
0.66666667 0.73333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7445833333333333
|
|
|
|
key: train_recall
|
|
value: [0.94927536 0.97101449 0.97101449 0.9270073 0.93430657 0.94160584
|
|
0.99275362 0.97101449 0.94202899 0.97826087]
|
|
|
|
mean value: 0.9578282026869777
|
|
|
|
key: test_roc_auc
|
|
value: [0.83541667 0.87291667 0.83541667 0.67916667 0.83958333 0.8375
|
|
0.66666667 0.8 0.73333333 0.7 ]
|
|
|
|
mean value: 0.78
|
|
|
|
key: train_roc_auc
|
|
value: [0.97463768 0.98550725 0.97820798 0.95625727 0.9635301 0.97080292
|
|
0.98913043 0.98550725 0.97101449 0.97826087]
|
|
|
|
mean value: 0.9752856236115519
|
|
|
|
key: test_jcc
|
|
value: [0.6875 0.77777778 0.6875 0.5 0.72222222 0.73684211
|
|
0.5 0.64705882 0.55555556 0.52631579]
|
|
|
|
mean value: 0.634077227382181
|
|
|
|
key: train_jcc
|
|
value: [0.94927536 0.97101449 0.95714286 0.91366906 0.92753623 0.94160584
|
|
0.97857143 0.97101449 0.94202899 0.95744681]
|
|
|
|
mean value: 0.9509305563606575
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06544423 0.11321926 0.11602688 0.09596086 0.07725477 0.07497787
|
|
0.07734275 0.07283497 0.0715425 0.07421112]
|
|
|
|
mean value: 0.08388152122497558
|
|
|
|
key: score_time
|
|
value: [0.02289581 0.04298139 0.02817822 0.02215266 0.02300477 0.02361321
|
|
0.0250411 0.0257802 0.02668738 0.02769637]
|
|
|
|
mean value: 0.026803112030029295
|
|
|
|
key: test_mcc
|
|
value: [0.02928896 0.31333978 0.35416667 0.22364661 0.36121114 0.35445878
|
|
0.13608276 0.53452248 0.28284271 0.42426407]
|
|
|
|
mean value: 0.30138239600110567
|
|
|
|
key: train_mcc
|
|
value: [0.97820682 0.97090871 0.97820798 0.97820682 0.97820682 0.97841275
|
|
0.97828655 0.96379342 0.98550725 0.97111648]
|
|
|
|
mean value: 0.9760853611689138
|
|
|
|
key: test_accuracy
|
|
value: [0.51612903 0.64516129 0.67741935 0.61290323 0.67741935 0.67741935
|
|
0.56666667 0.76666667 0.63333333 0.7 ]
|
|
|
|
mean value: 0.6473118279569893
|
|
|
|
key: train_accuracy
|
|
value: [0.98909091 0.98545455 0.98909091 0.98909091 0.98909091 0.98909091
|
|
0.98913043 0.98188406 0.99275362 0.98550725]
|
|
|
|
mean value: 0.9880184453227931
|
|
|
|
key: test_fscore
|
|
value: [0.48275862 0.68571429 0.66666667 0.64705882 0.72222222 0.70588235
|
|
0.51851852 0.77419355 0.56 0.74285714]
|
|
|
|
mean value: 0.6505872181526177
|
|
|
|
key: train_fscore
|
|
value: [0.98916968 0.98550725 0.98909091 0.98901099 0.98901099 0.98892989
|
|
0.98909091 0.98181818 0.99275362 0.98540146]
|
|
|
|
mean value: 0.9879783871830355
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 0.66666667 0.61111111 0.65 0.66666667
|
|
0.58333333 0.75 0.7 0.65 ]
|
|
|
|
mean value: 0.6377777777777778
|
|
|
|
key: train_precision
|
|
value: [0.98561151 0.98550725 0.99270073 0.99264706 0.99264706 1.
|
|
0.99270073 0.98540146 0.99275362 0.99264706]
|
|
|
|
mean value: 0.9912616476535202
|
|
|
|
key: test_recall
|
|
value: [0.46666667 0.8 0.66666667 0.6875 0.8125 0.75
|
|
0.46666667 0.8 0.46666667 0.86666667]
|
|
|
|
mean value: 0.6783333333333333
|
|
|
|
key: train_recall
|
|
value: [0.99275362 0.98550725 0.98550725 0.98540146 0.98540146 0.97810219
|
|
0.98550725 0.97826087 0.99275362 0.97826087]
|
|
|
|
mean value: 0.9847455834126733
|
|
|
|
key: test_roc_auc
|
|
value: [0.51458333 0.65 0.67708333 0.61041667 0.67291667 0.675
|
|
0.56666667 0.76666667 0.63333333 0.7 ]
|
|
|
|
mean value: 0.6466666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.98907754 0.98545435 0.98910399 0.98907754 0.98907754 0.98905109
|
|
0.98913043 0.98188406 0.99275362 0.98550725]
|
|
|
|
mean value: 0.9880117423040304
|
|
|
|
key: test_jcc
|
|
value: [0.31818182 0.52173913 0.5 0.47826087 0.56521739 0.54545455
|
|
0.35 0.63157895 0.38888889 0.59090909]
|
|
|
|
mean value: 0.48902306821071123
|
|
|
|
key: train_jcc
|
|
value: [0.97857143 0.97142857 0.97841727 0.97826087 0.97826087 0.97810219
|
|
0.97841727 0.96428571 0.98561151 0.97122302]
|
|
|
|
mean value: 0.9762578707945373
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.49068594 0.47535682 0.47198796 0.47094846 0.47834969 0.4801023
|
|
0.48822117 0.49525213 0.53687286 0.55150414]
|
|
|
|
mean value: 0.4939281463623047
|
|
|
|
key: score_time
|
|
value: [0.00965905 0.00966525 0.00965071 0.01117015 0.00985289 0.00947905
|
|
0.00972843 0.01040936 0.00936937 0.0097878 ]
|
|
|
|
mean value: 0.009877204895019531
|
|
|
|
key: test_mcc
|
|
value: [0.74689528 0.74896053 0.6778302 0.48333333 0.87083333 0.80833333
|
|
0.40089186 0.80178373 0.40089186 0.40089186]
|
|
|
|
mean value: 0.6340645329495894
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.87096774 0.87096774 0.83870968 0.74193548 0.93548387 0.90322581
|
|
0.7 0.9 0.7 0.7 ]
|
|
|
|
mean value: 0.8161290322580645
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.875 0.82758621 0.75 0.9375 0.90322581
|
|
0.70967742 0.90322581 0.70967742 0.68965517]
|
|
|
|
mean value: 0.8162690688066105
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.82352941 0.85714286 0.75 0.9375 0.93333333
|
|
0.6875 0.875 0.6875 0.71428571]
|
|
|
|
mean value: 0.8188868239603534
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.93333333 0.8 0.75 0.9375 0.875
|
|
0.73333333 0.93333333 0.73333333 0.66666667]
|
|
|
|
mean value: 0.81625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86875 0.87291667 0.8375 0.74166667 0.93541667 0.90416667
|
|
0.7 0.9 0.7 0.7 ]
|
|
|
|
mean value: 0.8160416666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.77777778 0.70588235 0.6 0.88235294 0.82352941
|
|
0.55 0.82352941 0.55 0.52631579]
|
|
|
|
mean value: 0.698938768489852
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02257228 0.02407503 0.03378057 0.02414417 0.02430534 0.05023384
|
|
0.02587032 0.03249168 0.02542448 0.02553535]
|
|
|
|
mean value: 0.02884330749511719
|
|
|
|
key: score_time
|
|
value: [0.01887488 0.01229167 0.01929283 0.01229572 0.01235938 0.01431823
|
|
0.02170515 0.01496339 0.01624966 0.01476908]
|
|
|
|
mean value: 0.01571199893951416
|
|
|
|
key: test_mcc
|
|
value: [0.09283444 0.23939495 0.22364661 0.1784296 0.35445878 0.22630095
|
|
0.20180184 0.26726124 0.13363062 0.06726728]
|
|
|
|
mean value: 0.19850263100769575
|
|
|
|
key: train_mcc
|
|
value: [0.86454883 0.98555786 0.89621758 0.9713228 0.95728155 0.88318781
|
|
0.85146932 0.89661673 0.74787836 0.85781645]
|
|
|
|
mean value: 0.8911897281281569
|
|
|
|
key: test_accuracy
|
|
value: [0.5483871 0.61290323 0.61290323 0.58064516 0.67741935 0.61290323
|
|
0.6 0.63333333 0.56666667 0.53333333]
|
|
|
|
mean value: 0.5978494623655914
|
|
|
|
key: train_accuracy
|
|
value: [0.93090909 0.99272727 0.94545455 0.98545455 0.97818182 0.93818182
|
|
0.92028986 0.94565217 0.85869565 0.92391304]
|
|
|
|
mean value: 0.9419459815546772
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.64705882 0.57142857 0.51851852 0.70588235 0.66666667
|
|
0.57142857 0.64516129 0.58064516 0.5625 ]
|
|
|
|
mean value: 0.596928995612582
|
|
|
|
key: train_fscore
|
|
value: [0.93379791 0.99280576 0.94845361 0.98561151 0.97857143 0.94158076
|
|
0.9261745 0.94845361 0.87619048 0.92929293]
|
|
|
|
mean value: 0.9460932478802436
|
|
|
|
key: test_precision
|
|
value: [0.53846154 0.57894737 0.61538462 0.63636364 0.66666667 0.6
|
|
0.61538462 0.625 0.5625 0.52941176]
|
|
|
|
mean value: 0.5968120205388008
|
|
|
|
key: train_precision
|
|
value: [0.89932886 0.98571429 0.90196078 0.97163121 0.95804196 0.88961039
|
|
0.8625 0.90196078 0.77966102 0.86792453]
|
|
|
|
mean value: 0.9018333811979286
|
|
|
|
key: test_recall
|
|
value: [0.46666667 0.73333333 0.53333333 0.4375 0.75 0.75
|
|
0.53333333 0.66666667 0.6 0.6 ]
|
|
|
|
mean value: 0.6070833333333333
|
|
|
|
key: train_recall
|
|
value: [0.97101449 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9971014492753623
|
|
|
|
key: test_roc_auc
|
|
value: [0.54583333 0.61666667 0.61041667 0.58541667 0.675 0.60833333
|
|
0.6 0.63333333 0.56666667 0.53333333]
|
|
|
|
mean value: 0.5975
|
|
|
|
key: train_roc_auc
|
|
value: [0.93076272 0.99270073 0.94525547 0.98550725 0.97826087 0.9384058
|
|
0.92028986 0.94565217 0.85869565 0.92391304]
|
|
|
|
mean value: 0.9419443562890087
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.47826087 0.4 0.35 0.54545455 0.5
|
|
0.4 0.47619048 0.40909091 0.39130435]
|
|
|
|
mean value: 0.42836344814605687
|
|
|
|
key: train_jcc
|
|
value: [0.87581699 0.98571429 0.90196078 0.97163121 0.95804196 0.88961039
|
|
0.8625 0.90196078 0.77966102 0.86792453]
|
|
|
|
mean value: 0.8994821946382935
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02440166 0.04039574 0.03362751 0.03191423 0.0341444 0.03159809
|
|
0.03414559 0.03350163 0.03697824 0.03455162]
|
|
|
|
mean value: 0.03352587223052979
|
|
|
|
key: score_time
|
|
value: [0.02671266 0.02270627 0.02497172 0.02473569 0.02427244 0.02613306
|
|
0.02413654 0.02417159 0.02538729 0.02572274]
|
|
|
|
mean value: 0.02489500045776367
|
|
|
|
key: test_mcc
|
|
value: [ 0.6125 0.63696156 0.80753845 0.48954403 0.48333333 0.68826048
|
|
0.14142136 0.48420012 -0.06726728 0.47087096]
|
|
|
|
mean value: 0.4747363012356044
|
|
|
|
key: train_mcc
|
|
value: [0.73937601 0.7456085 0.70223938 0.78190955 0.77474476 0.76102774
|
|
0.81167945 0.76819662 0.7826087 0.75393997]
|
|
|
|
mean value: 0.7621330687049908
|
|
|
|
key: test_accuracy
|
|
value: [0.80645161 0.80645161 0.90322581 0.74193548 0.74193548 0.83870968
|
|
0.56666667 0.73333333 0.46666667 0.73333333]
|
|
|
|
mean value: 0.7338709677419355
|
|
|
|
key: train_accuracy
|
|
value: [0.86909091 0.87272727 0.85090909 0.89090909 0.88727273 0.88
|
|
0.9057971 0.88405797 0.89130435 0.87681159]
|
|
|
|
mean value: 0.8808880105401845
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.82352941 0.89655172 0.73333333 0.75 0.85714286
|
|
0.48 0.69230769 0.5 0.75 ]
|
|
|
|
mean value: 0.728286501868652
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.87323944 0.87455197 0.85409253 0.89130435 0.88808664 0.88256228
|
|
0.90647482 0.88489209 0.89130435 0.87857143]
|
|
|
|
mean value: 0.8825079885514047
|
|
|
|
key: test_precision
|
|
value: [0.8 0.73684211 0.92857143 0.78571429 0.75 0.78947368
|
|
0.6 0.81818182 0.47058824 0.70588235]
|
|
|
|
mean value: 0.7385253910176511
|
|
|
|
key: train_precision
|
|
value: [0.84931507 0.86524823 0.83916084 0.88489209 0.87857143 0.86111111
|
|
0.9 0.87857143 0.89130435 0.86619718]
|
|
|
|
mean value: 0.8714371720113927
|
|
|
|
key: test_recall
|
|
value: [0.8 0.93333333 0.86666667 0.6875 0.75 0.9375
|
|
0.4 0.6 0.53333333 0.8 ]
|
|
|
|
mean value: 0.7308333333333333
|
|
|
|
key: train_recall
|
|
value: [0.89855072 0.88405797 0.86956522 0.89781022 0.89781022 0.90510949
|
|
0.91304348 0.89130435 0.89130435 0.89130435]
|
|
|
|
mean value: 0.8939860361789908
|
|
|
|
key: test_roc_auc
|
|
value: [0.80625 0.81041667 0.90208333 0.74375 0.74166667 0.83541667
|
|
0.56666667 0.73333333 0.46666667 0.73333333]
|
|
|
|
mean value: 0.7339583333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.86898339 0.87268592 0.850841 0.89093409 0.88731091 0.88009098
|
|
0.9057971 0.88405797 0.89130435 0.87681159]
|
|
|
|
mean value: 0.880881730667513
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.7 0.8125 0.57894737 0.6 0.75
|
|
0.31578947 0.52941176 0.33333333 0.6 ]
|
|
|
|
mean value: 0.5886648606811146
|
|
|
|
key: train_jcc
|
|
value: [0.775 0.77707006 0.74534161 0.80392157 0.7987013 0.78980892
|
|
0.82894737 0.79354839 0.80392157 0.78343949]
|
|
|
|
mean value: 0.789970027771844
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.32520008 0.24825335 0.25999808 0.25739646 0.28382564 0.27252793
|
|
0.29898787 0.33969402 0.27817726 0.25438118]
|
|
|
|
mean value: 0.2818441867828369
|
|
|
|
key: score_time
|
|
value: [0.0248785 0.02474403 0.02380967 0.0217061 0.02113008 0.02511883
|
|
0.02509761 0.02508569 0.02478576 0.02519155]
|
|
|
|
mean value: 0.02415478229522705
|
|
|
|
key: test_mcc
|
|
value: [ 0.6778302 0.43041423 0.48333333 0.29960206 0.54812195 0.6310315
|
|
0.13363062 0.40089186 -0.06726728 0.47087096]
|
|
|
|
mean value: 0.4008459443995318
|
|
|
|
key: train_mcc
|
|
value: [0.62278863 0.67273208 0.61526712 0.6445331 0.62189897 0.66576723
|
|
0.66673669 0.63045134 0.7826087 0.75393997]
|
|
|
|
mean value: 0.6676723820634367
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.70967742 0.74193548 0.64516129 0.77419355 0.80645161
|
|
0.56666667 0.7 0.46666667 0.73333333]
|
|
|
|
mean value: 0.6982795698924731
|
|
|
|
key: train_accuracy
|
|
value: [0.81090909 0.83636364 0.80727273 0.82181818 0.81090909 0.83272727
|
|
0.83333333 0.81521739 0.89130435 0.87681159]
|
|
|
|
mean value: 0.8336666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.82758621 0.72727273 0.73333333 0.62068966 0.78787879 0.83333333
|
|
0.55172414 0.68965517 0.5 0.75 ]
|
|
|
|
mean value: 0.7021473354231975
|
|
|
|
key: train_fscore
|
|
value: [0.81690141 0.83754513 0.81272085 0.82562278 0.8115942 0.83453237
|
|
0.83453237 0.81588448 0.89130435 0.87857143]
|
|
|
|
mean value: 0.8359209362693545
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.66666667 0.73333333 0.69230769 0.76470588 0.75
|
|
0.57142857 0.71428571 0.47058824 0.70588235]
|
|
|
|
mean value: 0.692634130575307
|
|
|
|
key: train_precision
|
|
value: [0.79452055 0.83453237 0.79310345 0.80555556 0.8057554 0.82269504
|
|
0.82857143 0.81294964 0.89130435 0.86619718]
|
|
|
|
mean value: 0.8255184956805666
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 0.73333333 0.5625 0.8125 0.9375
|
|
0.53333333 0.66666667 0.53333333 0.8 ]
|
|
|
|
mean value: 0.7179166666666666
|
|
|
|
key: train_recall
|
|
value: [0.84057971 0.84057971 0.83333333 0.84671533 0.81751825 0.84671533
|
|
0.84057971 0.81884058 0.89130435 0.89130435]
|
|
|
|
mean value: 0.8467470644239924
|
|
|
|
key: test_roc_auc
|
|
value: [0.8375 0.7125 0.74166667 0.64791667 0.77291667 0.80208333
|
|
0.56666667 0.7 0.46666667 0.73333333]
|
|
|
|
mean value: 0.698125
|
|
|
|
key: train_roc_auc
|
|
value: [0.8108008 0.83634825 0.80717762 0.82190839 0.81093304 0.83277795
|
|
0.83333333 0.81521739 0.89130435 0.87681159]
|
|
|
|
mean value: 0.833661271554004
|
|
|
|
key: test_jcc
|
|
value: [0.70588235 0.57142857 0.57894737 0.45 0.65 0.71428571
|
|
0.38095238 0.52631579 0.33333333 0.6 ]
|
|
|
|
mean value: 0.5511145510835913
|
|
|
|
key: train_jcc
|
|
value: [0.69047619 0.72049689 0.68452381 0.7030303 0.68292683 0.71604938
|
|
0.71604938 0.68902439 0.80392157 0.78343949]
|
|
|
|
mean value: 0.7189938241457846
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05534482 0.04458189 0.05736661 0.0531776 0.04012871 0.04258728
|
|
0.049196 0.04088068 0.04078937 0.0383327 ]
|
|
|
|
mean value: 0.04623856544494629
|
|
|
|
key: score_time
|
|
value: [0.01237965 0.01205087 0.01570582 0.0232656 0.01462054 0.01458383
|
|
0.01588368 0.01502609 0.01499796 0.01508451]
|
|
|
|
mean value: 0.015359854698181153
|
|
|
|
key: test_mcc
|
|
value: [0.63960215 0.47368421 0.51319869 0.57184997 0.6754386 0.51461988
|
|
0.40643275 0.56934383 0.40469382 0.62280702]
|
|
|
|
mean value: 0.5391670909734849
|
|
|
|
key: train_mcc
|
|
value: [0.67669524 0.71257485 0.67833746 0.66023295 0.68972407 0.64194542
|
|
0.68358581 0.66002286 0.67782533 0.65372826]
|
|
|
|
mean value: 0.6734672248197455
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.73684211 0.75675676 0.78378378 0.83783784 0.75675676
|
|
0.7027027 0.78378378 0.7027027 0.81081081]
|
|
|
|
mean value: 0.7687766714082503
|
|
|
|
key: train_accuracy
|
|
value: [0.83832335 0.85628743 0.83880597 0.82985075 0.84477612 0.82089552
|
|
0.84179104 0.82985075 0.83880597 0.82686567]
|
|
|
|
mean value: 0.836625256948789
|
|
|
|
key: test_fscore
|
|
value: [0.82926829 0.73684211 0.74285714 0.78947368 0.83333333 0.75675676
|
|
0.7027027 0.8 0.71794872 0.81081081]
|
|
|
|
mean value: 0.7719993546566075
|
|
|
|
key: train_fscore
|
|
value: [0.83928571 0.85628743 0.84302326 0.83381924 0.84705882 0.82352941
|
|
0.84084084 0.83185841 0.84023669 0.82634731]
|
|
|
|
mean value: 0.8382287112226234
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.73684211 0.76470588 0.75 0.83333333 0.73684211
|
|
0.72222222 0.76190476 0.7 0.83333333]
|
|
|
|
mean value: 0.761191101640018
|
|
|
|
key: train_precision
|
|
value: [0.83431953 0.85628743 0.82386364 0.81714286 0.8372093 0.81395349
|
|
0.84337349 0.81976744 0.83040936 0.82634731]
|
|
|
|
mean value: 0.8302673833931824
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.73684211 0.72222222 0.83333333 0.83333333 0.77777778
|
|
0.68421053 0.84210526 0.73684211 0.78947368]
|
|
|
|
mean value: 0.7850877192982456
|
|
|
|
key: train_recall
|
|
value: [0.84431138 0.85628743 0.86309524 0.85119048 0.85714286 0.83333333
|
|
0.83832335 0.84431138 0.8502994 0.82634731]
|
|
|
|
mean value: 0.8464642144282862
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.73684211 0.75584795 0.78508772 0.8377193 0.75730994
|
|
0.70321637 0.78216374 0.70175439 0.81140351]
|
|
|
|
mean value: 0.7687134502923977
|
|
|
|
key: train_roc_auc
|
|
value: [0.83832335 0.85628743 0.83873325 0.82978685 0.84473909 0.82085828
|
|
0.84178072 0.82989378 0.83884018 0.82686413]
|
|
|
|
mean value: 0.8366107071571144
|
|
|
|
key: test_jcc
|
|
value: [0.70833333 0.58333333 0.59090909 0.65217391 0.71428571 0.60869565
|
|
0.54166667 0.66666667 0.56 0.68181818]
|
|
|
|
mean value: 0.6307882552230378
|
|
|
|
key: train_jcc
|
|
value: [0.72307692 0.7486911 0.72864322 0.715 0.73469388 0.7
|
|
0.7253886 0.71212121 0.7244898 0.70408163]
|
|
|
|
mean value: 0.7216186357913695
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.08532763 0.86566377 1.48706555 1.45354915 1.482898 0.98251677
|
|
0.84934211 0.95717216 0.91341496 0.8904438 ]
|
|
|
|
mean value: 1.0967393875122071
|
|
|
|
key: score_time
|
|
value: [0.01735353 0.0173595 0.0123055 0.01229596 0.01246262 0.01285172
|
|
0.01553488 0.01550627 0.01223063 0.01213717]
|
|
|
|
mean value: 0.014003777503967285
|
|
|
|
key: test_mcc
|
|
value: [0.68803296 0.42163702 0.46019501 0.62280702 0.63129316 0.4633451
|
|
0.41299552 0.51793973 0.40469382 0.6754386 ]
|
|
|
|
mean value: 0.5298377940355362
|
|
|
|
key: train_mcc
|
|
value: [0.69462323 0.88662502 0.6480155 0.62471967 0.60007131 0.64247019
|
|
0.94641713 0.73169084 0.672267 0.56446556]
|
|
|
|
mean value: 0.7011365460048885
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.71052632 0.72972973 0.81081081 0.81081081 0.72972973
|
|
0.7027027 0.75675676 0.7027027 0.83783784]
|
|
|
|
mean value: 0.763371266002845
|
|
|
|
key: train_accuracy
|
|
value: [0.84730539 0.94311377 0.8238806 0.8119403 0.8 0.82089552
|
|
0.97313433 0.86567164 0.8358209 0.78208955]
|
|
|
|
mean value: 0.8503851997497542
|
|
|
|
key: test_fscore
|
|
value: [0.85 0.71794872 0.70588235 0.81081081 0.82051282 0.73684211
|
|
0.68571429 0.7804878 0.71794872 0.84210526]
|
|
|
|
mean value: 0.7668252879175631
|
|
|
|
key: train_fscore
|
|
value: [0.84776119 0.9439528 0.82697947 0.8173913 0.80235988 0.8255814
|
|
0.97280967 0.86725664 0.83870968 0.78466077]
|
|
|
|
mean value: 0.8527462799455923
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.7 0.75 0.78947368 0.76190476 0.7
|
|
0.75 0.72727273 0.7 0.84210526]
|
|
|
|
mean value: 0.7530280246069719
|
|
|
|
key: train_precision
|
|
value: [0.8452381 0.93023256 0.8150289 0.79661017 0.79532164 0.80681818
|
|
0.98170732 0.85465116 0.82183908 0.77325581]
|
|
|
|
mean value: 0.8420702918125469
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.73684211 0.66666667 0.83333333 0.88888889 0.77777778
|
|
0.63157895 0.84210526 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7850877192982456
|
|
|
|
key: train_recall
|
|
value: [0.8502994 0.95808383 0.83928571 0.83928571 0.80952381 0.8452381
|
|
0.96407186 0.88023952 0.85628743 0.79640719]
|
|
|
|
mean value: 0.8638722554890219
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.71052632 0.72807018 0.81140351 0.8128655 0.73099415
|
|
0.70467836 0.75438596 0.70175439 0.8377193 ]
|
|
|
|
mean value: 0.7634502923976608
|
|
|
|
key: train_roc_auc
|
|
value: [0.84730539 0.94311377 0.82383447 0.81185843 0.79997149 0.82082264
|
|
0.97310736 0.865715 0.83588181 0.78213216]
|
|
|
|
mean value: 0.850374251497006
|
|
|
|
key: test_jcc
|
|
value: [0.73913043 0.56 0.54545455 0.68181818 0.69565217 0.58333333
|
|
0.52173913 0.64 0.56 0.72727273]
|
|
|
|
mean value: 0.6254400527009223
|
|
|
|
key: train_jcc
|
|
value: [0.7357513 0.89385475 0.705 0.69117647 0.66995074 0.7029703
|
|
0.94705882 0.765625 0.72222222 0.64563107]
|
|
|
|
mean value: 0.7479240664187132
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01589036 0.01228857 0.01086211 0.01416421 0.01628971 0.0102284
|
|
0.01108384 0.00965357 0.00938272 0.01784778]
|
|
|
|
mean value: 0.012769126892089843
|
|
|
|
key: score_time
|
|
value: [0.01216507 0.01046753 0.00957394 0.01376891 0.01214242 0.00978446
|
|
0.00914025 0.00878954 0.00920725 0.0148778 ]
|
|
|
|
mean value: 0.010991716384887695
|
|
|
|
key: test_mcc
|
|
value: [ 0.33968311 0.32732684 0.41299552 0.53638795 0.44331728 0.34882767
|
|
0.29618896 0.24975622 -0.10530647 0.43165592]
|
|
|
|
mean value: 0.3280832999691107
|
|
|
|
key: train_mcc
|
|
value: [0.41503688 0.3843783 0.37627089 0.42782015 0.41081153 0.3681832
|
|
0.46130026 0.3602579 0.4495621 0.42626871]
|
|
|
|
mean value: 0.40798899164487934
|
|
|
|
key: test_accuracy
|
|
value: [0.65789474 0.65789474 0.7027027 0.75675676 0.7027027 0.64864865
|
|
0.64864865 0.62162162 0.45945946 0.7027027 ]
|
|
|
|
mean value: 0.6559032716927454
|
|
|
|
key: train_accuracy
|
|
value: [0.69760479 0.68263473 0.67761194 0.69850746 0.69552239 0.65970149
|
|
0.72835821 0.67164179 0.71343284 0.70149254]
|
|
|
|
mean value: 0.6926508177674502
|
|
|
|
key: test_fscore
|
|
value: [0.71111111 0.69767442 0.71794872 0.7804878 0.74418605 0.71111111
|
|
0.66666667 0.68181818 0.56521739 0.75555556]
|
|
|
|
mean value: 0.703177700551002
|
|
|
|
key: train_fscore
|
|
value: [0.73766234 0.7253886 0.7244898 0.74686717 0.7371134 0.72857143
|
|
0.74509804 0.71354167 0.75129534 0.74226804]
|
|
|
|
mean value: 0.7352295817077089
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.625 0.66666667 0.69565217 0.64 0.59259259
|
|
0.65 0.6 0.48148148 0.65384615]
|
|
|
|
mean value: 0.6220623683884553
|
|
|
|
key: train_precision
|
|
value: [0.65137615 0.63926941 0.63392857 0.64502165 0.65 0.60714286
|
|
0.7 0.63133641 0.66210046 0.65158371]
|
|
|
|
mean value: 0.6471759199332957
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.78947368 0.77777778 0.88888889 0.88888889 0.88888889
|
|
0.68421053 0.78947368 0.68421053 0.89473684]
|
|
|
|
mean value: 0.8128654970760234
|
|
|
|
key: train_recall
|
|
value: [0.8502994 0.83832335 0.8452381 0.88690476 0.85119048 0.91071429
|
|
0.79640719 0.82035928 0.86826347 0.86227545]
|
|
|
|
mean value: 0.8529975762760194
|
|
|
|
key: test_roc_auc
|
|
value: [0.65789474 0.65789474 0.70467836 0.76023392 0.70760234 0.65497076
|
|
0.64766082 0.61695906 0.45321637 0.69736842]
|
|
|
|
mean value: 0.6558479532163742
|
|
|
|
key: train_roc_auc
|
|
value: [0.69760479 0.68263473 0.67711007 0.6979434 0.69505632 0.65894996
|
|
0.72856074 0.6720844 0.71389364 0.70197106]
|
|
|
|
mean value: 0.6925809096093527
|
|
|
|
key: test_jcc
|
|
value: [0.55172414 0.53571429 0.56 0.64 0.59259259 0.55172414
|
|
0.5 0.51724138 0.39393939 0.60714286]
|
|
|
|
mean value: 0.5450078784561543
|
|
|
|
key: train_jcc
|
|
value: [0.58436214 0.56910569 0.568 0.596 0.58367347 0.57303371
|
|
0.59375 0.55465587 0.60165975 0.59016393]
|
|
|
|
mean value: 0.5814404564136447
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00999331 0.0103929 0.01702547 0.01013327 0.01034355 0.01006222
|
|
0.010252 0.01605487 0.01639366 0.01025558]
|
|
|
|
mean value: 0.012090682983398438
|
|
|
|
key: score_time
|
|
value: [0.00887465 0.00882936 0.01495981 0.00920868 0.00943518 0.00888228
|
|
0.00906563 0.01510024 0.01130962 0.00981021]
|
|
|
|
mean value: 0.010547566413879394
|
|
|
|
key: test_mcc
|
|
value: [0.69989647 0.15789474 0.35104619 0.37654316 0.45906433 0.29824561
|
|
0.51461988 0.52960948 0.18980224 0.45906433]
|
|
|
|
mean value: 0.4035786440813924
|
|
|
|
key: train_mcc
|
|
value: [0.4616078 0.51609084 0.47615806 0.46924483 0.47721894 0.49316404
|
|
0.49868094 0.49279758 0.52256686 0.45201983]
|
|
|
|
mean value: 0.48595497172512575
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.57894737 0.67567568 0.67567568 0.72972973 0.64864865
|
|
0.75675676 0.75675676 0.59459459 0.72972973]
|
|
|
|
mean value: 0.6988620199146515
|
|
|
|
key: train_accuracy
|
|
value: [0.73053892 0.75748503 0.73731343 0.73432836 0.73731343 0.74626866
|
|
0.74925373 0.74626866 0.76119403 0.72537313]
|
|
|
|
mean value: 0.742533738493163
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.57894737 0.64705882 0.71428571 0.72222222 0.64864865
|
|
0.75675676 0.79069767 0.65116279 0.73684211]
|
|
|
|
mean value: 0.71037649613861
|
|
|
|
key: train_fscore
|
|
value: [0.73684211 0.76521739 0.74857143 0.74202899 0.75141243 0.75362319
|
|
0.75147929 0.74926254 0.76331361 0.73410405]
|
|
|
|
mean value: 0.7495855010954724
|
|
|
|
key: test_precision
|
|
value: [0.7826087 0.57894737 0.6875 0.625 0.72222222 0.63157895
|
|
0.77777778 0.70833333 0.58333333 0.73684211]
|
|
|
|
mean value: 0.6834143783371472
|
|
|
|
key: train_precision
|
|
value: [0.72 0.74157303 0.71978022 0.72316384 0.71505376 0.73446328
|
|
0.74269006 0.73837209 0.75438596 0.70949721]
|
|
|
|
mean value: 0.7298979458691992
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.57894737 0.61111111 0.83333333 0.72222222 0.66666667
|
|
0.73684211 0.89473684 0.73684211 0.73684211]
|
|
|
|
mean value: 0.7464912280701754
|
|
|
|
key: train_recall
|
|
value: [0.75449102 0.79041916 0.7797619 0.76190476 0.79166667 0.77380952
|
|
0.76047904 0.76047904 0.77245509 0.76047904]
|
|
|
|
mean value: 0.7705945252352437
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.57894737 0.67397661 0.67982456 0.72953216 0.64912281
|
|
0.75730994 0.75292398 0.59064327 0.72953216]
|
|
|
|
mean value: 0.6983918128654971
|
|
|
|
key: train_roc_auc
|
|
value: [0.73053892 0.75748503 0.73718634 0.73424579 0.7371507 0.7461862
|
|
0.74928714 0.74631095 0.76122754 0.72547762]
|
|
|
|
mean value: 0.742509623609923
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.40740741 0.47826087 0.55555556 0.56521739 0.48
|
|
0.60869565 0.65384615 0.48275862 0.58333333]
|
|
|
|
mean value: 0.5565074983875584
|
|
|
|
key: train_jcc
|
|
value: [0.58333333 0.61971831 0.59817352 0.58986175 0.60180995 0.60465116
|
|
0.60189573 0.5990566 0.61722488 0.57990868]
|
|
|
|
mean value: 0.5995633922420729
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01021886 0.01016688 0.01081252 0.010288 0.01636624 0.01065087
|
|
0.01019359 0.01151347 0.01095486 0.01698399]
|
|
|
|
mean value: 0.01181492805480957
|
|
|
|
key: score_time
|
|
value: [0.01703095 0.04109097 0.01757669 0.01684403 0.04068542 0.01792574
|
|
0.03453708 0.0188992 0.0181427 0.03425932]
|
|
|
|
mean value: 0.025699210166931153
|
|
|
|
key: test_mcc
|
|
value: [ 0.26462806 0.21081851 0.24189738 -0.07673883 0.35104619 0.13274856
|
|
-0.07653316 0.13259028 -0.03274332 0.35087719]
|
|
|
|
mean value: 0.14985908708663487
|
|
|
|
key: train_mcc
|
|
value: [0.51497929 0.51520103 0.48293024 0.54656694 0.55954896 0.55954896
|
|
0.58214222 0.49263894 0.570858 0.45164458]
|
|
|
|
mean value: 0.5276059166470451
|
|
|
|
key: test_accuracy
|
|
value: [0.63157895 0.60526316 0.62162162 0.45945946 0.67567568 0.56756757
|
|
0.45945946 0.56756757 0.48648649 0.67567568]
|
|
|
|
mean value: 0.5750355618776671
|
|
|
|
key: train_accuracy
|
|
value: [0.75748503 0.75748503 0.74029851 0.77313433 0.77910448 0.77910448
|
|
0.79104478 0.74626866 0.78507463 0.72537313]
|
|
|
|
mean value: 0.7634373044954866
|
|
|
|
key: test_fscore
|
|
value: [0.65 0.61538462 0.58823529 0.54545455 0.64705882 0.52941176
|
|
0.375 0.61904762 0.53658537 0.68421053]
|
|
|
|
mean value: 0.5790388554409169
|
|
|
|
key: train_fscore
|
|
value: [0.75820896 0.76106195 0.75354108 0.77777778 0.78735632 0.78735632
|
|
0.79166667 0.74777448 0.78947368 0.73255814]
|
|
|
|
mean value: 0.7686775371193969
|
|
|
|
key: test_precision
|
|
value: [0.61904762 0.6 0.625 0.46153846 0.6875 0.5625
|
|
0.46153846 0.56521739 0.5 0.68421053]
|
|
|
|
mean value: 0.5766552459744679
|
|
|
|
key: train_precision
|
|
value: [0.75595238 0.75 0.71891892 0.76436782 0.76111111 0.76111111
|
|
0.78698225 0.74117647 0.77142857 0.71186441]
|
|
|
|
mean value: 0.7522913035502654
|
|
|
|
key: test_recall
|
|
value: [0.68421053 0.63157895 0.55555556 0.66666667 0.61111111 0.5
|
|
0.31578947 0.68421053 0.57894737 0.68421053]
|
|
|
|
mean value: 0.5912280701754387
|
|
|
|
key: train_recall
|
|
value: [0.76047904 0.77245509 0.79166667 0.79166667 0.81547619 0.81547619
|
|
0.79640719 0.75449102 0.80838323 0.75449102]
|
|
|
|
mean value: 0.7860992301112062
|
|
|
|
key: test_roc_auc
|
|
value: [0.63157895 0.60526316 0.61988304 0.46491228 0.67397661 0.56578947
|
|
0.46345029 0.56432749 0.48391813 0.6754386 ]
|
|
|
|
mean value: 0.5748538011695907
|
|
|
|
key: train_roc_auc
|
|
value: [0.75748503 0.75748503 0.74014471 0.77307884 0.77899558 0.77899558
|
|
0.79106074 0.74629313 0.785144 0.72545979]
|
|
|
|
mean value: 0.763414242942686
|
|
|
|
key: test_jcc
|
|
value: [0.48148148 0.44444444 0.41666667 0.375 0.47826087 0.36
|
|
0.23076923 0.44827586 0.36666667 0.52 ]
|
|
|
|
mean value: 0.4121565221662673
|
|
|
|
key: train_jcc
|
|
value: [0.61057692 0.61428571 0.60454545 0.63636364 0.6492891 0.6492891
|
|
0.65517241 0.5971564 0.65217391 0.57798165]
|
|
|
|
mean value: 0.6246834303640855
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02011538 0.0159595 0.01646566 0.01649833 0.01891685 0.01807475
|
|
0.01885533 0.01860213 0.01907539 0.01919794]
|
|
|
|
mean value: 0.01817612648010254
|
|
|
|
key: score_time
|
|
value: [0.01252246 0.01074481 0.01097894 0.01090217 0.01209044 0.01190162
|
|
0.01178312 0.01202965 0.01165557 0.01199579]
|
|
|
|
mean value: 0.011660456657409668
|
|
|
|
key: test_mcc
|
|
value: [0.69989647 0.42163702 0.51319869 0.58342636 0.68035483 0.47328975
|
|
0.47328975 0.62807634 0.35104619 0.6754386 ]
|
|
|
|
mean value: 0.5499654009161884
|
|
|
|
key: train_mcc
|
|
value: [0.68986066 0.71867885 0.70190853 0.68358581 0.69561499 0.68367203
|
|
0.68367203 0.70155053 0.72006441 0.67165669]
|
|
|
|
mean value: 0.6950264516570681
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.71052632 0.75675676 0.78378378 0.83783784 0.72972973
|
|
0.72972973 0.81081081 0.67567568 0.83783784]
|
|
|
|
mean value: 0.771479374110953
|
|
|
|
key: train_accuracy
|
|
value: [0.84431138 0.85928144 0.85074627 0.84179104 0.84776119 0.84179104
|
|
0.84179104 0.85074627 0.85970149 0.8358209 ]
|
|
|
|
mean value: 0.8473742068102601
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.7027027 0.74285714 0.8 0.84210526 0.75
|
|
0.70588235 0.82926829 0.7 0.84210526]
|
|
|
|
mean value: 0.7772063874642595
|
|
|
|
key: train_fscore
|
|
value: [0.84883721 0.86053412 0.85380117 0.84272997 0.84955752 0.84365782
|
|
0.83987915 0.85119048 0.86217009 0.8358209 ]
|
|
|
|
mean value: 0.8488178426849451
|
|
|
|
key: test_precision
|
|
value: [0.7826087 0.72222222 0.76470588 0.72727273 0.8 0.68181818
|
|
0.8 0.77272727 0.66666667 0.84210526]
|
|
|
|
mean value: 0.7560126911870081
|
|
|
|
key: train_precision
|
|
value: [0.82485876 0.85294118 0.83908046 0.84023669 0.84210526 0.83625731
|
|
0.84756098 0.84615385 0.84482759 0.83333333]
|
|
|
|
mean value: 0.840735539409663
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.68421053 0.72222222 0.88888889 0.88888889 0.83333333
|
|
0.63157895 0.89473684 0.73684211 0.84210526]
|
|
|
|
mean value: 0.8070175438596491
|
|
|
|
key: train_recall
|
|
value: [0.8742515 0.86826347 0.86904762 0.8452381 0.85714286 0.85119048
|
|
0.83233533 0.85628743 0.88023952 0.83832335]
|
|
|
|
mean value: 0.8572319646421442
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.71052632 0.75584795 0.78654971 0.83918129 0.73245614
|
|
0.73245614 0.80847953 0.67397661 0.8377193 ]
|
|
|
|
mean value: 0.7719298245614036
|
|
|
|
key: train_roc_auc
|
|
value: [0.84431138 0.85928144 0.85069147 0.84178072 0.84773311 0.8417629
|
|
0.8417629 0.85076276 0.85976262 0.83582834]
|
|
|
|
mean value: 0.8473677644710579
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.54166667 0.59090909 0.66666667 0.72727273 0.6
|
|
0.54545455 0.70833333 0.53846154 0.72727273]
|
|
|
|
mean value: 0.6396037296037296
|
|
|
|
key: train_jcc
|
|
value: [0.73737374 0.75520833 0.74489796 0.72820513 0.73846154 0.72959184
|
|
0.72395833 0.74093264 0.75773196 0.71794872]
|
|
|
|
mean value: 0.7374310185824089
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.41314268 1.27847314 1.36850953 1.2497642 1.4931643 1.47693133
|
|
1.34268689 1.40755868 1.42651916 1.38605094]
|
|
|
|
mean value: 1.3842800855636597
|
|
|
|
key: score_time
|
|
value: [0.01228833 0.01226258 0.02383208 0.01257062 0.01577306 0.02474785
|
|
0.01533437 0.01541162 0.0157671 0.02361655]
|
|
|
|
mean value: 0.017160415649414062
|
|
|
|
key: test_mcc
|
|
value: [0.79388419 0.52704628 0.78362573 0.51461988 0.6754386 0.35087719
|
|
0.19469789 0.51793973 0.24408665 0.68035483]
|
|
|
|
mean value: 0.5282570968231516
|
|
|
|
key: train_mcc
|
|
value: [0.9940298 0.97021644 0.98210658 0.97611919 0.97030305 0.97016575
|
|
1. 0.97030621 0.9880596 0.9705841 ]
|
|
|
|
mean value: 0.9791890713615404
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.76315789 0.89189189 0.75675676 0.83783784 0.67567568
|
|
0.59459459 0.75675676 0.62162162 0.83783784]
|
|
|
|
mean value: 0.7630867709815078
|
|
|
|
key: train_accuracy
|
|
value: [0.99700599 0.98502994 0.99104478 0.9880597 0.98507463 0.98507463
|
|
1. 0.98507463 0.99402985 0.98507463]
|
|
|
|
mean value: 0.9895468763964608
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.76923077 0.88888889 0.75675676 0.83333333 0.66666667
|
|
0.57142857 0.7804878 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7655681680071924
|
|
|
|
key: train_fscore
|
|
value: [0.99701493 0.9851632 0.99109792 0.98809524 0.98525074 0.9851632
|
|
1. 0.9851632 0.99401198 0.98525074]
|
|
|
|
mean value: 0.9896211151534519
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.75 0.88888889 0.73684211 0.83333333 0.66666667
|
|
0.625 0.72727273 0.60869565 0.88235294]
|
|
|
|
mean value: 0.7660228785363393
|
|
|
|
key: train_precision
|
|
value: [0.99404762 0.97647059 0.98816568 0.98809524 0.97660819 0.98224852
|
|
1. 0.97647059 0.99401198 0.97093023]
|
|
|
|
mean value: 0.9847048630537424
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.78947368 0.88888889 0.77777778 0.83333333 0.66666667
|
|
0.52631579 0.84210526 0.73684211 0.78947368]
|
|
|
|
mean value: 0.769298245614035
|
|
|
|
key: train_recall
|
|
value: [1. 0.99401198 0.99404762 0.98809524 0.99404762 0.98809524
|
|
1. 0.99401198 0.99401198 1. ]
|
|
|
|
mean value: 0.9946321642429427
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.76315789 0.89181287 0.75730994 0.8377193 0.6754386
|
|
0.59649123 0.75438596 0.61842105 0.83918129]
|
|
|
|
mean value: 0.7628654970760235
|
|
|
|
key: train_roc_auc
|
|
value: [0.99700599 0.98502994 0.99103579 0.9880596 0.98504776 0.98506558
|
|
1. 0.98510123 0.9940298 0.98511905]
|
|
|
|
mean value: 0.9895494724836043
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.625 0.8 0.60869565 0.71428571 0.5
|
|
0.4 0.64 0.5 0.71428571]
|
|
|
|
mean value: 0.6302267080745342
|
|
|
|
key: train_jcc
|
|
value: [0.99404762 0.97076023 0.98235294 0.97647059 0.97093023 0.97076023
|
|
1. 0.97076023 0.98809524 0.97093023]
|
|
|
|
mean value: 0.9795107553425286
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03163719 0.02367377 0.02250123 0.02224255 0.02085638 0.02292895
|
|
0.02223945 0.02356005 0.02180123 0.02279878]
|
|
|
|
mean value: 0.02342395782470703
|
|
|
|
key: score_time
|
|
value: [0.01199412 0.00928497 0.00918674 0.00986052 0.00882602 0.00926352
|
|
0.0089097 0.00884819 0.00900483 0.00924397]
|
|
|
|
mean value: 0.00944225788116455
|
|
|
|
key: test_mcc
|
|
value: [0.43643578 0.58218174 0.46019501 0.40469382 0.6754386 0.46019501
|
|
0.30384671 0.74044197 0.35087719 0.63129316]
|
|
|
|
mean value: 0.5045598988460415
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71052632 0.78947368 0.72972973 0.7027027 0.83783784 0.72972973
|
|
0.64864865 0.86486486 0.67567568 0.81081081]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.77777778 0.70588235 0.68571429 0.83333333 0.70588235
|
|
0.62857143 0.85714286 0.68421053 0.8 ]
|
|
|
|
mean value: 0.7345181581404492
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.82352941 0.75 0.70588235 0.83333333 0.75
|
|
0.6875 0.9375 0.68421053 0.875 ]
|
|
|
|
mean value: 0.7832669910069291
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.57894737 0.73684211 0.66666667 0.66666667 0.83333333 0.66666667
|
|
0.57894737 0.78947368 0.68421053 0.73684211]
|
|
|
|
mean value: 0.6938596491228071
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.71052632 0.78947368 0.72807018 0.70175439 0.8377193 0.72807018
|
|
0.6505848 0.86695906 0.6754386 0.8128655 ]
|
|
|
|
mean value: 0.7501461988304093
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.63636364 0.54545455 0.52173913 0.71428571 0.54545455
|
|
0.45833333 0.75 0.52 0.66666667]
|
|
|
|
mean value: 0.5858297571993224
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12009811 0.12002826 0.11910319 0.11681914 0.1155467 0.11768603
|
|
0.11389899 0.11417508 0.11645961 0.11742949]
|
|
|
|
mean value: 0.11712446212768554
|
|
|
|
key: score_time
|
|
value: [0.01999784 0.01961231 0.01888847 0.01908422 0.01846051 0.01872778
|
|
0.01780486 0.01878047 0.01897168 0.01792836]
|
|
|
|
mean value: 0.018825650215148926
|
|
|
|
key: test_mcc
|
|
value: [0.52704628 0.32732684 0.73821295 0.63129316 0.62170355 0.51461988
|
|
0.24269006 0.66826731 0.35558302 0.58342636]
|
|
|
|
mean value: 0.5210169404979561
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.65789474 0.86486486 0.81081081 0.81081081 0.75675676
|
|
0.62162162 0.81081081 0.67567568 0.78378378]
|
|
|
|
mean value: 0.7556187766714083
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.60606061 0.84848485 0.82051282 0.8 0.75675676
|
|
0.63157895 0.84444444 0.71428571 0.76470588]
|
|
|
|
mean value: 0.7556060789497322
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.71428571 0.93333333 0.76190476 0.82352941 0.73684211
|
|
0.63157895 0.73076923 0.65217391 0.86666667]
|
|
|
|
mean value: 0.760108408439947
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.52631579 0.77777778 0.88888889 0.77777778 0.77777778
|
|
0.63157895 1. 0.78947368 0.68421053]
|
|
|
|
mean value: 0.764327485380117
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.65789474 0.8625731 0.8128655 0.80994152 0.75730994
|
|
0.62134503 0.80555556 0.67251462 0.78654971]
|
|
|
|
mean value: 0.7549707602339182
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.43478261 0.73684211 0.69565217 0.66666667 0.60869565
|
|
0.46153846 0.73076923 0.55555556 0.61904762]
|
|
|
|
mean value: 0.61345500736233
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01001716 0.0102427 0.01120782 0.01007128 0.01148486 0.01006961
|
|
0.00966978 0.00973654 0.00998855 0.01113558]
|
|
|
|
mean value: 0.010362386703491211
|
|
|
|
key: score_time
|
|
value: [0.00965428 0.0090642 0.00921178 0.00883627 0.01021385 0.009233
|
|
0.00957894 0.00886178 0.00884724 0.00882649]
|
|
|
|
mean value: 0.009232783317565918
|
|
|
|
key: test_mcc
|
|
value: [ 0.21821789 0.26919095 0.62807634 0.30307132 0.35104619 0.25997534
|
|
0.25301653 0.32780503 -0.03274332 0.29824561]
|
|
|
|
mean value: 0.28759019039690875
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.60526316 0.63157895 0.81081081 0.64864865 0.67567568 0.62162162
|
|
0.62162162 0.64864865 0.48648649 0.64864865]
|
|
|
|
mean value: 0.639900426742532
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.58823529 0.78787879 0.58064516 0.64705882 0.5
|
|
0.58823529 0.58064516 0.53658537 0.64864865]
|
|
|
|
mean value: 0.6003387082180991
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.64285714 0.66666667 0.86666667 0.69230769 0.6875 0.7
|
|
0.66666667 0.75 0.5 0.66666667]
|
|
|
|
mean value: 0.6839331501831502
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.47368421 0.52631579 0.72222222 0.5 0.61111111 0.38888889
|
|
0.52631579 0.47368421 0.57894737 0.63157895]
|
|
|
|
mean value: 0.5432748538011696
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.60526316 0.63157895 0.80847953 0.64473684 0.67397661 0.61549708
|
|
0.62426901 0.65350877 0.48391813 0.64912281]
|
|
|
|
mean value: 0.6390350877192982
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.41666667 0.65 0.40909091 0.47826087 0.33333333
|
|
0.41666667 0.40909091 0.36666667 0.48 ]
|
|
|
|
mean value: 0.4334776021080369
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.60507369 1.62998867 1.6802237 1.65969014 1.63140559 1.62670565
|
|
1.62145782 1.6118865 1.636415 1.69331813]
|
|
|
|
mean value: 1.6396164894104004
|
|
|
|
key: score_time
|
|
value: [0.09210968 0.09264421 0.09607458 0.09379959 0.09156585 0.09922385
|
|
0.0952518 0.09469843 0.09818506 0.09887791]
|
|
|
|
mean value: 0.09524309635162354
|
|
|
|
key: test_mcc
|
|
value: [0.68421053 0.65465367 0.64287856 0.62280702 0.56934383 0.40643275
|
|
0.41299552 0.73099415 0.40469382 0.74044197]
|
|
|
|
mean value: 0.5869451826148115
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.81578947 0.81081081 0.81081081 0.78378378 0.7027027
|
|
0.7027027 0.86486486 0.7027027 0.86486486]
|
|
|
|
mean value: 0.7901137980085349
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.78787879 0.77419355 0.81081081 0.76470588 0.7027027
|
|
0.68571429 0.86486486 0.71794872 0.85714286]
|
|
|
|
mean value: 0.780806772096096
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84210526 0.92857143 0.92307692 0.78947368 0.8125 0.68421053
|
|
0.75 0.88888889 0.7 0.9375 ]
|
|
|
|
mean value: 0.8256326714221451
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.68421053 0.66666667 0.83333333 0.72222222 0.72222222
|
|
0.63157895 0.84210526 0.73684211 0.78947368]
|
|
|
|
mean value: 0.7470760233918129
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.81578947 0.80701754 0.81140351 0.78216374 0.70321637
|
|
0.70467836 0.86549708 0.70175439 0.86695906]
|
|
|
|
mean value: 0.7900584795321638
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.65 0.63157895 0.68181818 0.61904762 0.54166667
|
|
0.52173913 0.76190476 0.56 0.75 ]
|
|
|
|
mean value: 0.644502803451316
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.920614 0.92041969 0.92466736 0.9429481 0.94805288 0.9522264
|
|
0.93666458 1.07816815 0.954494 0.94619513]
|
|
|
|
mean value: 0.9524450302124023
|
|
|
|
key: score_time
|
|
value: [0.22036052 0.21733069 0.27829409 0.25204992 0.27903605 0.23214865
|
|
0.27200103 0.21072578 0.26293111 0.28287148]
|
|
|
|
mean value: 0.2507749319076538
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.59222009 0.62807634 0.68035483 0.51319869 0.4633451
|
|
0.41299552 0.6754386 0.35087719 0.7888597 ]
|
|
|
|
mean value: 0.5737821592302272
|
|
|
|
key: train_mcc
|
|
value: [0.89247161 0.89247161 0.88065448 0.90504752 0.89259616 0.8986453
|
|
0.90506771 0.89312092 0.88066298 0.89260381]
|
|
|
|
mean value: 0.8933342103509413
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.78947368 0.81081081 0.83783784 0.75675676 0.72972973
|
|
0.7027027 0.83783784 0.67567568 0.89189189]
|
|
|
|
mean value: 0.784850640113798
|
|
|
|
key: train_accuracy
|
|
value: [0.94610778 0.94610778 0.94029851 0.95223881 0.94626866 0.94925373
|
|
0.95223881 0.94626866 0.94029851 0.94626866]
|
|
|
|
mean value: 0.9465349897220484
|
|
|
|
key: test_fscore
|
|
value: [0.82051282 0.76470588 0.78787879 0.84210526 0.74285714 0.73684211
|
|
0.68571429 0.84210526 0.68421053 0.88888889]
|
|
|
|
mean value: 0.7795820966099604
|
|
|
|
key: train_fscore
|
|
value: [0.94674556 0.94674556 0.9408284 0.95321637 0.94674556 0.94985251
|
|
0.95294118 0.94705882 0.94047619 0.94642857]
|
|
|
|
mean value: 0.9471038732305795
|
|
|
|
key: test_precision
|
|
value: [0.8 0.86666667 0.86666667 0.8 0.76470588 0.7
|
|
0.75 0.84210526 0.68421053 0.94117647]
|
|
|
|
mean value: 0.8015531475748194
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
train_precision
|
|
value: [0.93567251 0.93567251 0.93529412 0.93678161 0.94117647 0.94152047
|
|
0.93641618 0.93063584 0.93491124 0.9408284 ]
|
|
|
|
mean value: 0.9368909362598521
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.68421053 0.72222222 0.88888889 0.72222222 0.77777778
|
|
0.63157895 0.84210526 0.68421053 0.84210526]
|
|
|
|
mean value: 0.7637426900584795
|
|
|
|
key: train_recall
|
|
value: [0.95808383 0.95808383 0.94642857 0.9702381 0.95238095 0.95833333
|
|
0.97005988 0.96407186 0.94610778 0.95209581]
|
|
|
|
mean value: 0.9575883946392928
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.78947368 0.80847953 0.83918129 0.75584795 0.73099415
|
|
0.70467836 0.8377193 0.6754386 0.89327485]
|
|
|
|
mean value: 0.7850877192982456
|
|
|
|
key: train_roc_auc
|
|
value: [0.94610778 0.94610778 0.94028015 0.95218492 0.94625036 0.94922655
|
|
0.95229184 0.94632164 0.9403158 0.946286 ]
|
|
|
|
mean value: 0.9465372825777018
|
|
|
|
key: test_jcc
|
|
value: [0.69565217 0.61904762 0.65 0.72727273 0.59090909 0.58333333
|
|
0.52173913 0.72727273 0.52 0.8 ]
|
|
|
|
mean value: 0.6435226802183324
|
|
|
|
key: train_jcc
|
|
value: [0.8988764 0.8988764 0.88826816 0.91061453 0.8988764 0.90449438
|
|
0.91011236 0.89944134 0.88764045 0.89830508]
|
|
|
|
mean value: 0.8995505511586513
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02609611 0.01061249 0.01086545 0.01080394 0.01096463 0.0109055
|
|
0.01082563 0.01109815 0.01089931 0.01093006]
|
|
|
|
mean value: 0.012400126457214356
|
|
|
|
key: score_time
|
|
value: [0.01182461 0.00930953 0.00980473 0.00963306 0.00965619 0.00978112
|
|
0.00959444 0.0097003 0.00971866 0.00967646]
|
|
|
|
mean value: 0.009869909286499024
|
|
|
|
key: test_mcc
|
|
value: [0.69989647 0.15789474 0.35104619 0.37654316 0.45906433 0.29824561
|
|
0.51461988 0.52960948 0.18980224 0.45906433]
|
|
|
|
mean value: 0.4035786440813924
|
|
|
|
key: train_mcc
|
|
value: [0.4616078 0.51609084 0.47615806 0.46924483 0.47721894 0.49316404
|
|
0.49868094 0.49279758 0.52256686 0.45201983]
|
|
|
|
mean value: 0.48595497172512575
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.57894737 0.67567568 0.67567568 0.72972973 0.64864865
|
|
0.75675676 0.75675676 0.59459459 0.72972973]
|
|
|
|
mean value: 0.6988620199146515
|
|
|
|
key: train_accuracy
|
|
value: [0.73053892 0.75748503 0.73731343 0.73432836 0.73731343 0.74626866
|
|
0.74925373 0.74626866 0.76119403 0.72537313]
|
|
|
|
mean value: 0.742533738493163
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.57894737 0.64705882 0.71428571 0.72222222 0.64864865
|
|
0.75675676 0.79069767 0.65116279 0.73684211]
|
|
|
|
mean value: 0.71037649613861
|
|
|
|
key: train_fscore
|
|
value: [0.73684211 0.76521739 0.74857143 0.74202899 0.75141243 0.75362319
|
|
0.75147929 0.74926254 0.76331361 0.73410405]
|
|
|
|
mean value: 0.7495855010954724
|
|
|
|
key: test_precision
|
|
value: [0.7826087 0.57894737 0.6875 0.625 0.72222222 0.63157895
|
|
0.77777778 0.70833333 0.58333333 0.73684211]
|
|
|
|
mean value: 0.6834143783371472
|
|
|
|
key: train_precision
|
|
value: [0.72 0.74157303 0.71978022 0.72316384 0.71505376 0.73446328
|
|
0.74269006 0.73837209 0.75438596 0.70949721]
|
|
|
|
mean value: 0.7298979458691992
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.57894737 0.61111111 0.83333333 0.72222222 0.66666667
|
|
0.73684211 0.89473684 0.73684211 0.73684211]
|
|
|
|
mean value: 0.7464912280701754
|
|
|
|
key: train_recall
|
|
value: [0.75449102 0.79041916 0.7797619 0.76190476 0.79166667 0.77380952
|
|
0.76047904 0.76047904 0.77245509 0.76047904]
|
|
|
|
mean value: 0.7705945252352437
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.57894737 0.67397661 0.67982456 0.72953216 0.64912281
|
|
0.75730994 0.75292398 0.59064327 0.72953216]
|
|
|
|
mean value: 0.6983918128654971
|
|
|
|
key: train_roc_auc
|
|
value: [0.73053892 0.75748503 0.73718634 0.73424579 0.7371507 0.7461862
|
|
0.74928714 0.74631095 0.76122754 0.72547762]
|
|
|
|
mean value: 0.742509623609923
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.40740741 0.47826087 0.55555556 0.56521739 0.48
|
|
0.60869565 0.65384615 0.48275862 0.58333333]
|
|
|
|
mean value: 0.5565074983875584
|
|
|
|
key: train_jcc
|
|
value: [0.58333333 0.61971831 0.59817352 0.58986175 0.60180995 0.60465116
|
|
0.60189573 0.5990566 0.61722488 0.57990868]
|
|
|
|
mean value: 0.5995633922420729
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.12970591 0.08958173 0.11434889 0.1245048 0.08423162 0.12775731
|
|
0.0952704 0.07696056 0.08785892 0.08014584]
|
|
|
|
mean value: 0.10103659629821778
|
|
|
|
key: score_time
|
|
value: [0.01228452 0.01177216 0.01194835 0.01111197 0.01239347 0.01124144
|
|
0.01112938 0.01121688 0.01273608 0.01139426]
|
|
|
|
mean value: 0.011722850799560546
|
|
|
|
key: test_mcc
|
|
value: [0.74620251 0.84327404 0.67849265 0.62807634 0.51319869 0.73099415
|
|
0.4633451 0.73020842 0.62807634 0.7888597 ]
|
|
|
|
mean value: 0.6750727927544885
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.92105263 0.83783784 0.81081081 0.75675676 0.86486486
|
|
0.72972973 0.86486486 0.81081081 0.89189189]
|
|
|
|
mean value: 0.8357041251778095
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.91891892 0.82352941 0.78787879 0.74285714 0.86486486
|
|
0.72222222 0.87179487 0.82926829 0.88888889]
|
|
|
|
mean value: 0.8307366259016187
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.94444444 0.875 0.86666667 0.76470588 0.84210526
|
|
0.76470588 0.85 0.77272727 0.94117647]
|
|
|
|
mean value: 0.8559031882290395
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.89473684 0.77777778 0.72222222 0.72222222 0.88888889
|
|
0.68421053 0.89473684 0.89473684 0.84210526]
|
|
|
|
mean value: 0.8111111111111111
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86842105 0.92105263 0.83625731 0.80847953 0.75584795 0.86549708
|
|
0.73099415 0.86403509 0.80847953 0.89327485]
|
|
|
|
mean value: 0.835233918128655
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.85 0.7 0.65 0.59090909 0.76190476
|
|
0.56521739 0.77272727 0.70833333 0.8 ]
|
|
|
|
mean value: 0.7149091850178807
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04436898 0.05903172 0.04550195 0.05854368 0.03785062 0.04440618
|
|
0.03837872 0.08264017 0.07626629 0.08594012]
|
|
|
|
mean value: 0.057292842864990236
|
|
|
|
key: score_time
|
|
value: [0.02070236 0.01216078 0.012218 0.01226234 0.0123291 0.0127306
|
|
0.01215196 0.0203948 0.02352381 0.02420211]
|
|
|
|
mean value: 0.01626758575439453
|
|
|
|
key: test_mcc
|
|
value: [0.47368421 0.31622777 0.67849265 0.40469382 0.68035483 0.35087719
|
|
0.30384671 0.56934383 0.35484024 0.56725146]
|
|
|
|
mean value: 0.4699612700739381
|
|
|
|
key: train_mcc
|
|
value: [0.7964643 0.82041812 0.80299412 0.80919788 0.81493495 0.82095883
|
|
0.82089393 0.76752483 0.83283433 0.80310162]
|
|
|
|
mean value: 0.80893229122112
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.65789474 0.83783784 0.7027027 0.83783784 0.67567568
|
|
0.64864865 0.78378378 0.67567568 0.78378378]
|
|
|
|
mean value: 0.7340682788051209
|
|
|
|
key: train_accuracy
|
|
value: [0.89820359 0.91017964 0.90149254 0.90447761 0.90746269 0.91044776
|
|
0.91044776 0.88358209 0.91641791 0.90149254]
|
|
|
|
mean value: 0.9044204129055322
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.64864865 0.82352941 0.68571429 0.84210526 0.66666667
|
|
0.62857143 0.8 0.66666667 0.78947368]
|
|
|
|
mean value: 0.7288218160663982
|
|
|
|
key: train_fscore
|
|
value: [0.89880952 0.91071429 0.90207715 0.90361446 0.90801187 0.91017964
|
|
0.91017964 0.88495575 0.91616766 0.90030211]
|
|
|
|
mean value: 0.9045012101250447
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.66666667 0.875 0.70588235 0.8 0.66666667
|
|
0.6875 0.76190476 0.70588235 0.78947368]
|
|
|
|
mean value: 0.7395818590594132
|
|
|
|
key: train_precision
|
|
value: [0.89349112 0.90532544 0.89940828 0.91463415 0.90532544 0.91566265
|
|
0.91017964 0.87209302 0.91616766 0.90853659]
|
|
|
|
mean value: 0.904082400681275
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.63157895 0.77777778 0.66666667 0.88888889 0.66666667
|
|
0.57894737 0.84210526 0.63157895 0.78947368]
|
|
|
|
mean value: 0.7210526315789474
|
|
|
|
key: train_recall
|
|
value: [0.90419162 0.91616766 0.9047619 0.89285714 0.91071429 0.9047619
|
|
0.91017964 0.89820359 0.91616766 0.89221557]
|
|
|
|
mean value: 0.9050220986598232
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.65789474 0.83625731 0.70175439 0.83918129 0.6754386
|
|
0.6505848 0.78216374 0.67690058 0.78362573]
|
|
|
|
mean value: 0.7340643274853801
|
|
|
|
key: train_roc_auc
|
|
value: [0.89820359 0.91017964 0.90148275 0.9045124 0.90745295 0.91046478
|
|
0.91044696 0.88362561 0.91641717 0.90146493]
|
|
|
|
mean value: 0.9044250784145994
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.48 0.7 0.52173913 0.72727273 0.5
|
|
0.45833333 0.66666667 0.5 0.65217391]
|
|
|
|
mean value: 0.5789519104084322
|
|
|
|
key: train_jcc
|
|
value: [0.81621622 0.83606557 0.82162162 0.82417582 0.83152174 0.83516484
|
|
0.83516484 0.79365079 0.84530387 0.81868132]
|
|
|
|
mean value: 0.8257566624979686
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02459884 0.01037455 0.00942564 0.00927401 0.00964308 0.0094378
|
|
0.00930643 0.009269 0.00950551 0.00938058]
|
|
|
|
mean value: 0.0110215425491333
|
|
|
|
key: score_time
|
|
value: [0.01136661 0.00889158 0.00862312 0.00862908 0.00901937 0.00859523
|
|
0.00862193 0.00873423 0.00863767 0.0087359 ]
|
|
|
|
mean value: 0.008985471725463868
|
|
|
|
key: test_mcc
|
|
value: [0.48454371 0.21081851 0.29618896 0.47328975 0.58342636 0.35484024
|
|
0.24269006 0.35558302 0.24975622 0.35558302]
|
|
|
|
mean value: 0.3606719850841716
|
|
|
|
key: train_mcc
|
|
value: [0.4093119 0.41044367 0.44176362 0.40692678 0.40626794 0.46651574
|
|
0.43081449 0.37763411 0.39611822 0.42953016]
|
|
|
|
mean value: 0.41753266340858863
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.60526316 0.64864865 0.72972973 0.78378378 0.67567568
|
|
0.62162162 0.67567568 0.62162162 0.67567568]
|
|
|
|
mean value: 0.6774537695590327
|
|
|
|
key: train_accuracy
|
|
value: [0.70359281 0.70359281 0.71940299 0.70149254 0.70149254 0.73134328
|
|
0.71343284 0.68656716 0.69552239 0.71343284]
|
|
|
|
mean value: 0.7069872195906695
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.61538462 0.62857143 0.75 0.8 0.68421053
|
|
0.63157895 0.71428571 0.68181818 0.71428571]
|
|
|
|
mean value: 0.6982039889934627
|
|
|
|
key: train_fscore
|
|
value: [0.71794872 0.72112676 0.73595506 0.72222222 0.72067039 0.74860335
|
|
0.73033708 0.70752089 0.71666667 0.72727273]
|
|
|
|
mean value: 0.7248323863886837
|
|
|
|
key: test_precision
|
|
value: [0.69565217 0.6 0.64705882 0.68181818 0.72727273 0.65
|
|
0.63157895 0.65217391 0.6 0.65217391]
|
|
|
|
mean value: 0.6537728679988741
|
|
|
|
key: train_precision
|
|
value: [0.68478261 0.68085106 0.69680851 0.67708333 0.67894737 0.70526316
|
|
0.68783069 0.66145833 0.66839378 0.69189189]
|
|
|
|
mean value: 0.6833310738252193
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.63157895 0.61111111 0.83333333 0.88888889 0.72222222
|
|
0.63157895 0.78947368 0.78947368 0.78947368]
|
|
|
|
mean value: 0.7529239766081871
|
|
|
|
key: train_recall
|
|
value: [0.75449102 0.76646707 0.7797619 0.77380952 0.76785714 0.79761905
|
|
0.77844311 0.76047904 0.77245509 0.76646707]
|
|
|
|
mean value: 0.77178500142572
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.60526316 0.64766082 0.73245614 0.78654971 0.67690058
|
|
0.62134503 0.67251462 0.61695906 0.67251462]
|
|
|
|
mean value: 0.6769005847953217
|
|
|
|
key: train_roc_auc
|
|
value: [0.70359281 0.70359281 0.71922227 0.70127602 0.70129384 0.73114485
|
|
0.71362632 0.68678714 0.69575135 0.71359068]
|
|
|
|
mean value: 0.7069878100940975
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.44444444 0.45833333 0.6 0.66666667 0.52
|
|
0.46153846 0.55555556 0.51724138 0.55555556]
|
|
|
|
mean value: 0.5394720011788977
|
|
|
|
key: train_jcc
|
|
value: [0.56 0.56387665 0.58222222 0.56521739 0.56331878 0.59821429
|
|
0.57522124 0.54741379 0.55844156 0.57142857]
|
|
|
|
mean value: 0.5685354490427442
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01294255 0.01785254 0.01440525 0.01589322 0.01523256 0.01849508
|
|
0.01868582 0.01692724 0.01855922 0.01615882]
|
|
|
|
mean value: 0.016515231132507323
|
|
|
|
key: score_time
|
|
value: [0.00858951 0.01109195 0.01098466 0.01165199 0.01162195 0.01164508
|
|
0.01162982 0.01159692 0.01163578 0.01163483]
|
|
|
|
mean value: 0.011208248138427735
|
|
|
|
key: test_mcc
|
|
value: [0.47519096 0.45291081 0.57857577 0.56725146 0.40826373 0.46019501
|
|
0.30518397 0.40611643 0.35821893 0.62807634]
|
|
|
|
mean value: 0.46399834120615263
|
|
|
|
key: train_mcc
|
|
value: [0.45719953 0.59862194 0.62319098 0.6537872 0.47409383 0.68260397
|
|
0.34054695 0.56861082 0.59545476 0.59643094]
|
|
|
|
mean value: 0.5590540907121603
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.71052632 0.78378378 0.78378378 0.67567568 0.72972973
|
|
0.59459459 0.64864865 0.64864865 0.81081081]
|
|
|
|
mean value: 0.7070412517780938
|
|
|
|
key: train_accuracy
|
|
value: [0.67664671 0.78443114 0.8 0.82686567 0.68656716 0.83880597
|
|
0.60298507 0.75223881 0.7641791 0.7761194 ]
|
|
|
|
mean value: 0.7508839038341228
|
|
|
|
key: test_fscore
|
|
value: [0.53846154 0.75555556 0.75 0.77777778 0.53846154 0.70588235
|
|
0.71698113 0.74509804 0.73469388 0.82926829]
|
|
|
|
mean value: 0.7092180104722692
|
|
|
|
key: train_fscore
|
|
value: [0.52631579 0.8134715 0.76975945 0.82634731 0.54935622 0.82911392
|
|
0.71520343 0.79805353 0.80778589 0.81108312]
|
|
|
|
mean value: 0.7446490160460283
|
|
|
|
key: test_precision
|
|
value: [1. 0.65384615 0.85714286 0.77777778 0.875 0.75
|
|
0.55882353 0.59375 0.6 0.77272727]
|
|
|
|
mean value: 0.7439067590905826
|
|
|
|
key: train_precision
|
|
value: [0.98360656 0.71689498 0.91056911 0.8313253 0.98461538 0.88513514
|
|
0.55666667 0.67213115 0.68032787 0.7 ]
|
|
|
|
mean value: 0.7921272144252504
|
|
|
|
key: test_recall
|
|
value: [0.36842105 0.89473684 0.66666667 0.77777778 0.38888889 0.66666667
|
|
1. 1. 0.94736842 0.89473684]
|
|
|
|
mean value: 0.7605263157894737
|
|
|
|
key: train_recall
|
|
value: [0.35928144 0.94011976 0.66666667 0.82142857 0.38095238 0.7797619
|
|
1. 0.98203593 0.99401198 0.96407186]
|
|
|
|
mean value: 0.7888330481893356
|
|
|
|
key: test_roc_auc
|
|
value: [0.68421053 0.71052632 0.78070175 0.78362573 0.66812865 0.72807018
|
|
0.58333333 0.63888889 0.64035088 0.80847953]
|
|
|
|
mean value: 0.7026315789473684
|
|
|
|
key: train_roc_auc
|
|
value: [0.67664671 0.78443114 0.8003992 0.82688195 0.68748218 0.83898275
|
|
0.60416667 0.75292273 0.76486313 0.77667879]
|
|
|
|
mean value: 0.7513455232392358
|
|
|
|
key: test_jcc
|
|
value: [0.36842105 0.60714286 0.6 0.63636364 0.36842105 0.54545455
|
|
0.55882353 0.59375 0.58064516 0.70833333]
|
|
|
|
mean value: 0.5567355168259618
|
|
|
|
key: train_jcc
|
|
value: [0.35714286 0.68558952 0.62569832 0.70408163 0.37869822 0.70810811
|
|
0.55666667 0.66396761 0.67755102 0.68220339]
|
|
|
|
mean value: 0.6039707354670469
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02310205 0.01856565 0.01713705 0.02080774 0.01771021 0.0191052
|
|
0.02140164 0.01983404 0.01634526 0.02297759]
|
|
|
|
mean value: 0.01969864368438721
|
|
|
|
key: score_time
|
|
value: [0.01175785 0.01174521 0.01170135 0.01181269 0.01168871 0.01173592
|
|
0.01177812 0.01172948 0.0117929 0.01177216]
|
|
|
|
mean value: 0.011751437187194824
|
|
|
|
key: test_mcc
|
|
value: [0.68803296 0.32732684 0.42489158 0.41299552 0.38474188 0.45644817
|
|
0.24975622 0.54996161 0.38173594 0.56934383]
|
|
|
|
mean value: 0.44452345572320945
|
|
|
|
key: train_mcc
|
|
value: [0.71385582 0.64962264 0.70072654 0.65169774 0.35341838 0.59052377
|
|
0.60078683 0.69292684 0.67512991 0.74214874]
|
|
|
|
mean value: 0.6370837200225458
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.65789474 0.7027027 0.7027027 0.62162162 0.7027027
|
|
0.62162162 0.75675676 0.67567568 0.78378378]
|
|
|
|
mean value: 0.7067567567567568
|
|
|
|
key: train_accuracy
|
|
value: [0.85628743 0.81437126 0.84477612 0.81791045 0.6119403 0.75820896
|
|
0.77014925 0.8358209 0.8238806 0.85970149]
|
|
|
|
mean value: 0.7993046742336223
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.60606061 0.73170732 0.71794872 0.72 0.59259259
|
|
0.68181818 0.8 0.73913043 0.8 ]
|
|
|
|
mean value: 0.7222591183609212
|
|
|
|
key: train_fscore
|
|
value: [0.85185185 0.78767123 0.8579235 0.83646113 0.72103004 0.68235294
|
|
0.81081081 0.85333333 0.84514436 0.87466667]
|
|
|
|
mean value: 0.8121245859862802
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.71428571 0.65217391 0.66666667 0.5625 0.88888889
|
|
0.6 0.69230769 0.62962963 0.76190476]
|
|
|
|
mean value: 0.7050710207903302
|
|
|
|
key: train_precision
|
|
value: [0.87898089 0.92 0.79292929 0.76097561 0.56375839 1.
|
|
0.6875 0.76923077 0.75233645 0.78846154]
|
|
|
|
mean value: 0.7914172939957319
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.52631579 0.83333333 0.77777778 1. 0.44444444
|
|
0.78947368 0.94736842 0.89473684 0.84210526]
|
|
|
|
mean value: 0.7845029239766081
|
|
|
|
key: train_recall
|
|
value: [0.82634731 0.68862275 0.93452381 0.92857143 1. 0.51785714
|
|
0.98802395 0.95808383 0.96407186 0.98203593]
|
|
|
|
mean value: 0.8788138009694896
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.65789474 0.70614035 0.70467836 0.63157895 0.69590643
|
|
0.61695906 0.75146199 0.66959064 0.78216374]
|
|
|
|
mean value: 0.7058479532163743
|
|
|
|
key: train_roc_auc
|
|
value: [0.85628743 0.81437126 0.84450741 0.81757913 0.61077844 0.75892857
|
|
0.77079769 0.83618477 0.82429783 0.86006558]
|
|
|
|
mean value: 0.7993798118049615
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.43478261 0.57692308 0.56 0.5625 0.42105263
|
|
0.51724138 0.66666667 0.5862069 0.66666667]
|
|
|
|
mean value: 0.5706325640678793
|
|
|
|
key: train_jcc
|
|
value: [0.74193548 0.64971751 0.75119617 0.71889401 0.56375839 0.51785714
|
|
0.68181818 0.74418605 0.73181818 0.77725118]
|
|
|
|
mean value: 0.6878432306561658
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16630363 0.14846015 0.14814687 0.1504178 0.14558625 0.15073895
|
|
0.15437222 0.1469028 0.14646888 0.14754939]
|
|
|
|
mean value: 0.15049469470977783
|
|
|
|
key: score_time
|
|
value: [0.01658773 0.01535249 0.01540661 0.0155499 0.01619935 0.01665998
|
|
0.01664519 0.0156033 0.01539564 0.01603627]
|
|
|
|
mean value: 0.01594364643096924
|
|
|
|
key: test_mcc
|
|
value: [0.38829014 0.63245553 0.56725146 0.51319869 0.40780312 0.35484024
|
|
0.51319869 0.56934383 0.45906433 0.58342636]
|
|
|
|
mean value: 0.49888723795886447
|
|
|
|
key: train_mcc
|
|
value: [0.95810101 0.95810101 0.95822492 0.95822492 0.95836009 0.95223838
|
|
0.92240242 0.92240519 0.97016575 0.9582264 ]
|
|
|
|
mean value: 0.9516450089400443
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.81578947 0.78378378 0.75675676 0.7027027 0.67567568
|
|
0.75675676 0.78378378 0.72972973 0.78378378]
|
|
|
|
mean value: 0.7472972972972973
|
|
|
|
key: train_accuracy
|
|
value: [0.97904192 0.97904192 0.97910448 0.97910448 0.97910448 0.9761194
|
|
0.96119403 0.96119403 0.98507463 0.97910448]
|
|
|
|
mean value: 0.9758083832335329
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.81081081 0.77777778 0.74285714 0.66666667 0.68421053
|
|
0.76923077 0.8 0.73684211 0.76470588]
|
|
|
|
mean value: 0.7378101681275056
|
|
|
|
key: train_fscore
|
|
value: [0.97910448 0.97910448 0.97922849 0.97922849 0.97935103 0.97619048
|
|
0.96096096 0.96119403 0.98498498 0.97910448]
|
|
|
|
mean value: 0.9758451890565136
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.83333333 0.77777778 0.76470588 0.73333333 0.65
|
|
0.75 0.76190476 0.73684211 0.86666667]
|
|
|
|
mean value: 0.7643794629862741
|
|
|
|
key: train_precision
|
|
value: [0.97619048 0.97619048 0.97633136 0.97633136 0.97076023 0.97619048
|
|
0.96385542 0.95833333 0.98795181 0.97619048]
|
|
|
|
mean value: 0.973832542282252
|
|
|
|
key: test_recall
|
|
value: [0.52631579 0.78947368 0.77777778 0.72222222 0.61111111 0.72222222
|
|
0.78947368 0.84210526 0.73684211 0.68421053]
|
|
|
|
mean value: 0.7201754385964912
|
|
|
|
key: train_recall
|
|
value: [0.98203593 0.98203593 0.98214286 0.98214286 0.98809524 0.97619048
|
|
0.95808383 0.96407186 0.98203593 0.98203593]
|
|
|
|
mean value: 0.9778870829769033
|
|
|
|
key: test_roc_auc
|
|
value: [0.68421053 0.81578947 0.78362573 0.75584795 0.7002924 0.67690058
|
|
0.75584795 0.78216374 0.72953216 0.78654971]
|
|
|
|
mean value: 0.7470760233918128
|
|
|
|
key: train_roc_auc
|
|
value: [0.97904192 0.97904192 0.97909538 0.97909538 0.97907756 0.97611919
|
|
0.96118477 0.96120259 0.98506558 0.9791132 ]
|
|
|
|
mean value: 0.9758037496435701
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.68181818 0.63636364 0.59090909 0.5 0.52
|
|
0.625 0.66666667 0.58333333 0.61904762]
|
|
|
|
mean value: 0.5877683982683982
|
|
|
|
key: train_jcc
|
|
value: [0.95906433 0.95906433 0.95930233 0.95930233 0.95953757 0.95348837
|
|
0.92485549 0.92528736 0.9704142 0.95906433]
|
|
|
|
mean value: 0.9529380626801041
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06599069 0.06799698 0.06891441 0.0592823 0.04597569 0.09300995
|
|
0.08456278 0.07400322 0.07236242 0.08309746]
|
|
|
|
mean value: 0.0715195894241333
|
|
|
|
key: score_time
|
|
value: [0.02218223 0.02220726 0.03101325 0.02146029 0.01967931 0.03042531
|
|
0.02586341 0.02647734 0.03936148 0.03168368]
|
|
|
|
mean value: 0.02703535556793213
|
|
|
|
key: test_mcc
|
|
value: [0.74620251 0.59222009 0.51319869 0.57857577 0.56934383 0.6754386
|
|
0.45906433 0.69356297 0.69007214 0.75938069]
|
|
|
|
mean value: 0.627705961039184
|
|
|
|
key: train_mcc
|
|
value: [0.94694002 0.9589266 0.95836453 0.95836453 0.93475011 0.97030621
|
|
0.97016575 0.95250666 0.98210658 0.96479428]
|
|
|
|
mean value: 0.9597225265991568
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.78947368 0.75675676 0.78378378 0.78378378 0.83783784
|
|
0.72972973 0.83783784 0.83783784 0.86486486]
|
|
|
|
mean value: 0.8090327169274538
|
|
|
|
key: train_accuracy
|
|
value: [0.97305389 0.97904192 0.97910448 0.97910448 0.96716418 0.98507463
|
|
0.98507463 0.9761194 0.99104478 0.98208955]
|
|
|
|
mean value: 0.9796871927786218
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.76470588 0.74285714 0.75 0.76470588 0.83333333
|
|
0.73684211 0.82352941 0.85714286 0.84848485]
|
|
|
|
mean value: 0.7978744320694785
|
|
|
|
key: train_fscore
|
|
value: [0.97247706 0.97859327 0.97897898 0.97897898 0.96676737 0.98498498
|
|
0.98498498 0.97575758 0.99099099 0.98170732]
|
|
|
|
mean value: 0.9794221519742311
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.86666667 0.76470588 0.85714286 0.8125 0.83333333
|
|
0.73684211 0.93333333 0.7826087 1. ]
|
|
|
|
mean value: 0.8524632873744463
|
|
|
|
key: train_precision
|
|
value: [0.99375 1. 0.98787879 0.98787879 0.98159509 0.99393939
|
|
0.98795181 0.98773006 0.9939759 1. ]
|
|
|
|
mean value: 0.9914699833914576
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.68421053 0.72222222 0.66666667 0.72222222 0.83333333
|
|
0.73684211 0.73684211 0.94736842 0.73684211]
|
|
|
|
mean value: 0.7576023391812865
|
|
|
|
key: train_recall
|
|
value: [0.95209581 0.95808383 0.9702381 0.9702381 0.95238095 0.97619048
|
|
0.98203593 0.96407186 0.98802395 0.96407186]
|
|
|
|
mean value: 0.9677430852580553
|
|
|
|
key: test_roc_auc
|
|
value: [0.86842105 0.78947368 0.75584795 0.78070175 0.78216374 0.8377193
|
|
0.72953216 0.84064327 0.83479532 0.86842105]
|
|
|
|
mean value: 0.8087719298245614
|
|
|
|
key: train_roc_auc
|
|
value: [0.97305389 0.97904192 0.97913102 0.97913102 0.96720844 0.98510123
|
|
0.98506558 0.97608355 0.99103579 0.98203593]
|
|
|
|
mean value: 0.9796888366124894
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.61904762 0.59090909 0.6 0.61904762 0.71428571
|
|
0.58333333 0.7 0.75 0.73684211]
|
|
|
|
mean value: 0.6663465481886535
|
|
|
|
key: train_jcc
|
|
value: [0.94642857 0.95808383 0.95882353 0.95882353 0.93567251 0.9704142
|
|
0.9704142 0.95266272 0.98214286 0.96407186]
|
|
|
|
mean value: 0.9597537814897951
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07527375 0.12890768 0.12948847 0.11022401 0.09028745 0.09113503
|
|
0.11805844 0.12561107 0.09590125 0.09975028]
|
|
|
|
mean value: 0.10646374225616455
|
|
|
|
key: score_time
|
|
value: [0.02161479 0.03995824 0.02960944 0.02846646 0.02835655 0.02844834
|
|
0.02764821 0.02710414 0.02579927 0.02790666]
|
|
|
|
mean value: 0.0284912109375
|
|
|
|
key: test_mcc
|
|
value: [0.42163702 0.31980107 0.40780312 0.48981224 0.46019501 0.40643275
|
|
0.24633537 0.40780312 0.24408665 0.62280702]
|
|
|
|
mean value: 0.40267133743246863
|
|
|
|
key: train_mcc
|
|
value: [0.9760479 0.97021644 0.98210721 0.97016575 0.97611919 0.97618794
|
|
0.98210721 0.97618794 0.97016575 0.97611919]
|
|
|
|
mean value: 0.9755424518064258
|
|
|
|
key: test_accuracy
|
|
value: [0.71052632 0.65789474 0.7027027 0.72972973 0.72972973 0.7027027
|
|
0.62162162 0.7027027 0.62162162 0.81081081]
|
|
|
|
mean value: 0.6990042674253201
|
|
|
|
key: train_accuracy
|
|
value: [0.98802395 0.98502994 0.99104478 0.98507463 0.9880597 0.9880597
|
|
0.99104478 0.9880597 0.98507463 0.9880597 ]
|
|
|
|
mean value: 0.9877531504155868
|
|
|
|
key: test_fscore
|
|
value: [0.7027027 0.62857143 0.66666667 0.76190476 0.70588235 0.7027027
|
|
0.61111111 0.73170732 0.66666667 0.81081081]
|
|
|
|
mean value: 0.6988726521151198
|
|
|
|
key: train_fscore
|
|
value: [0.98802395 0.98489426 0.99104478 0.9851632 0.98809524 0.98816568
|
|
0.99104478 0.98795181 0.98498498 0.98802395]
|
|
|
|
mean value: 0.987739263177944
|
|
|
|
key: test_precision
|
|
value: [0.72222222 0.6875 0.73333333 0.66666667 0.75 0.68421053
|
|
0.64705882 0.68181818 0.60869565 0.83333333]
|
|
|
|
mean value: 0.7014838739392851
|
|
|
|
key: train_precision
|
|
value: [0.98802395 0.99390244 0.99401198 0.98224852 0.98809524 0.98235294
|
|
0.98809524 0.99393939 0.98795181 0.98802395]
|
|
|
|
mean value: 0.9886645458509227
|
|
|
|
key: test_recall
|
|
value: [0.68421053 0.57894737 0.61111111 0.88888889 0.66666667 0.72222222
|
|
0.57894737 0.78947368 0.73684211 0.78947368]
|
|
|
|
mean value: 0.7046783625730995
|
|
|
|
key: train_recall
|
|
value: [0.98802395 0.9760479 0.98809524 0.98809524 0.98809524 0.99404762
|
|
0.99401198 0.98203593 0.98203593 0.98802395]
|
|
|
|
mean value: 0.9868512974051896
|
|
|
|
key: test_roc_auc
|
|
value: [0.71052632 0.65789474 0.7002924 0.73391813 0.72807018 0.70321637
|
|
0.62280702 0.7002924 0.61842105 0.81140351]
|
|
|
|
mean value: 0.6986842105263158
|
|
|
|
key: train_roc_auc
|
|
value: [0.98802395 0.98502994 0.99105361 0.98506558 0.9880596 0.98804177
|
|
0.99105361 0.98804177 0.98506558 0.9880596 ]
|
|
|
|
mean value: 0.987749500998004
|
|
|
|
key: test_jcc
|
|
value: [0.54166667 0.45833333 0.5 0.61538462 0.54545455 0.54166667
|
|
0.44 0.57692308 0.5 0.68181818]
|
|
|
|
mean value: 0.5401247086247086
|
|
|
|
key: train_jcc
|
|
value: [0.97633136 0.9702381 0.98224852 0.97076023 0.97647059 0.97660819
|
|
0.98224852 0.97619048 0.9704142 0.97633136]
|
|
|
|
mean value: 0.9757841545213538
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.54426956 0.54028654 0.54029012 0.5366323 0.53663683 0.54450583
|
|
0.53248 0.54761362 0.55458283 0.5454073 ]
|
|
|
|
mean value: 0.5422704935073852
|
|
|
|
key: score_time
|
|
value: [0.00960541 0.00932002 0.00929761 0.00963545 0.0094142 0.00931263
|
|
0.00924611 0.00945902 0.00950003 0.00955081]
|
|
|
|
mean value: 0.009434127807617187
|
|
|
|
key: test_mcc
|
|
value: [0.74620251 0.74620251 0.62280702 0.73821295 0.62280702 0.73099415
|
|
0.45906433 0.89736456 0.56934383 0.80369958]
|
|
|
|
mean value: 0.6936698446792978
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.86842105 0.81081081 0.86486486 0.81081081 0.86486486
|
|
0.72972973 0.94594595 0.78378378 0.89189189]
|
|
|
|
mean value: 0.8439544807965861
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.85714286 0.81081081 0.84848485 0.81081081 0.86486486
|
|
0.73684211 0.94444444 0.8 0.88235294]
|
|
|
|
mean value: 0.8412896540141123
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.9375 0.78947368 0.93333333 0.78947368 0.84210526
|
|
0.73684211 1. 0.76190476 1. ]
|
|
|
|
mean value: 0.87281328320802
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.78947368 0.83333333 0.77777778 0.83333333 0.88888889
|
|
0.73684211 0.89473684 0.84210526 0.78947368]
|
|
|
|
mean value: 0.8175438596491228
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86842105 0.86842105 0.81140351 0.8625731 0.81140351 0.86549708
|
|
0.72953216 0.94736842 0.78216374 0.89473684]
|
|
|
|
mean value: 0.8441520467836258
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.75 0.68181818 0.73684211 0.68181818 0.76190476
|
|
0.58333333 0.89473684 0.66666667 0.78947368]
|
|
|
|
mean value: 0.7296593757120072
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02594852 0.02771282 0.02660966 0.02726889 0.02665496 0.02608347
|
|
0.02618623 0.02622151 0.0268333 0.02669811]
|
|
|
|
mean value: 0.026621747016906738
|
|
|
|
key: score_time
|
|
value: [0.01252174 0.01249051 0.02194047 0.02166414 0.01521969 0.01518083
|
|
0.01580071 0.01532936 0.01531005 0.01541018]
|
|
|
|
mean value: 0.016086769104003907
|
|
|
|
key: test_mcc
|
|
value: [ 0.15789474 0.4061812 0.6754386 0.37654316 0.46019501 0.40643275
|
|
0.10530647 0.14287993 -0.02932564 0.26327408]
|
|
|
|
mean value: 0.29648202808316054
|
|
|
|
key: train_mcc
|
|
value: [0.92349775 0.97049496 0.92516134 0.92280545 0.8429841 0.91747584
|
|
0.80829691 0.80878922 0.92894622 0.91409778]
|
|
|
|
mean value: 0.89625495666898
|
|
|
|
key: test_accuracy
|
|
value: [0.57894737 0.68421053 0.83783784 0.67567568 0.72972973 0.7027027
|
|
0.54054054 0.56756757 0.48648649 0.62162162]
|
|
|
|
mean value: 0.6425320056899004
|
|
|
|
key: train_accuracy
|
|
value: [0.96107784 0.98502994 0.96119403 0.96119403 0.91641791 0.95820896
|
|
0.89850746 0.89552239 0.9641791 0.95522388]
|
|
|
|
mean value: 0.9456555545625167
|
|
|
|
key: test_fscore
|
|
value: [0.57894737 0.6 0.83333333 0.71428571 0.70588235 0.7027027
|
|
0.4137931 0.52941176 0.51282051 0.5625 ]
|
|
|
|
mean value: 0.615367685265865
|
|
|
|
key: train_fscore
|
|
value: [0.96 0.98480243 0.96275072 0.96072508 0.92265193 0.95731707
|
|
0.88888889 0.88294314 0.96341463 0.95297806]
|
|
|
|
mean value: 0.9436471953618683
|
|
|
|
key: test_precision
|
|
value: [0.57894737 0.81818182 0.83333333 0.625 0.75 0.68421053
|
|
0.6 0.6 0.5 0.69230769]
|
|
|
|
mean value: 0.6681980738559686
|
|
|
|
key: train_precision
|
|
value: [0.98734177 1. 0.9281768 0.97546012 0.86082474 0.98125
|
|
0.97841727 1. 0.98136646 1. ]
|
|
|
|
mean value: 0.9692837158513816
|
|
|
|
key: test_recall
|
|
value: [0.57894737 0.47368421 0.83333333 0.83333333 0.66666667 0.72222222
|
|
0.31578947 0.47368421 0.52631579 0.47368421]
|
|
|
|
mean value: 0.589766081871345
|
|
|
|
key: train_recall
|
|
value: [0.93413174 0.97005988 1. 0.94642857 0.99404762 0.93452381
|
|
0.81437126 0.79041916 0.94610778 0.91017964]
|
|
|
|
mean value: 0.9240269461077845
|
|
|
|
key: test_roc_auc
|
|
value: [0.57894737 0.68421053 0.8377193 0.67982456 0.72807018 0.70321637
|
|
0.54678363 0.57017544 0.48538012 0.62573099]
|
|
|
|
mean value: 0.6440058479532164
|
|
|
|
key: train_roc_auc
|
|
value: [0.96107784 0.98502994 0.96107784 0.96123824 0.91618549 0.95827987
|
|
0.89825706 0.89520958 0.96412532 0.95508982]
|
|
|
|
mean value: 0.9455571000855432
|
|
|
|
key: test_jcc
|
|
value: [0.40740741 0.42857143 0.71428571 0.55555556 0.54545455 0.54166667
|
|
0.26086957 0.36 0.34482759 0.39130435]
|
|
|
|
mean value: 0.4549942817191693
|
|
|
|
key: train_jcc
|
|
value: [0.92307692 0.97005988 0.9281768 0.9244186 0.85641026 0.91812865
|
|
0.8 0.79041916 0.92941176 0.91017964]
|
|
|
|
mean value: 0.8950281682029826
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02368689 0.03681374 0.03670192 0.03689241 0.03667092 0.03666615
|
|
0.03516221 0.03657341 0.03415823 0.03699064]
|
|
|
|
mean value: 0.03503165245056152
|
|
|
|
key: score_time
|
|
value: [0.0209415 0.02243066 0.02019882 0.02174187 0.0229249 0.02086854
|
|
0.02042031 0.02179146 0.02347565 0.02079344]
|
|
|
|
mean value: 0.021558713912963868
|
|
|
|
key: test_mcc
|
|
value: [0.68803296 0.36842105 0.51461988 0.46019501 0.73099415 0.51461988
|
|
0.47328975 0.51793973 0.45906433 0.62280702]
|
|
|
|
mean value: 0.5349983770122155
|
|
|
|
key: train_mcc
|
|
value: [0.73055202 0.77267677 0.70774947 0.74328997 0.750072 0.76729762
|
|
0.76142604 0.74948308 0.77911246 0.75532979]
|
|
|
|
mean value: 0.7516989205173386
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.68421053 0.75675676 0.72972973 0.86486486 0.75675676
|
|
0.72972973 0.75675676 0.72972973 0.81081081]
|
|
|
|
mean value: 0.7661450924608819
|
|
|
|
key: train_accuracy
|
|
value: [0.86526946 0.88622754 0.85373134 0.87164179 0.87462687 0.88358209
|
|
0.88059701 0.87462687 0.88955224 0.87761194]
|
|
|
|
mean value: 0.8757467155241755
|
|
|
|
key: test_fscore
|
|
value: [0.85 0.68421053 0.75675676 0.70588235 0.86486486 0.75675676
|
|
0.70588235 0.7804878 0.73684211 0.81081081]
|
|
|
|
mean value: 0.7652494331528539
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_cd_8020.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.86567164 0.88757396 0.85630499 0.87240356 0.87790698 0.88288288
|
|
0.8816568 0.87573964 0.88888889 0.87613293]
|
|
|
|
mean value: 0.8765162281189884
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.68421053 0.73684211 0.75 0.84210526 0.73684211
|
|
0.8 0.72727273 0.73684211 0.83333333]
|
|
|
|
mean value: 0.7656971975393028
|
|
|
|
key: train_precision
|
|
value: [0.86309524 0.87719298 0.84393064 0.86982249 0.85795455 0.89090909
|
|
0.87134503 0.86549708 0.89156627 0.88414634]
|
|
|
|
mean value: 0.8715459689747079
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.68421053 0.77777778 0.66666667 0.88888889 0.77777778
|
|
0.63157895 0.84210526 0.73684211 0.78947368]
|
|
|
|
mean value: 0.7690058479532164
|
|
|
|
key: train_recall
|
|
value: [0.86826347 0.89820359 0.86904762 0.875 0.89880952 0.875
|
|
0.89221557 0.88622754 0.88622754 0.86826347]
|
|
|
|
mean value: 0.8817258340461933
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.68421053 0.75730994 0.72807018 0.86549708 0.75730994
|
|
0.73245614 0.75438596 0.72953216 0.81140351]
|
|
|
|
mean value: 0.7662280701754386
|
|
|
|
key: train_roc_auc
|
|
value: [0.86526946 0.88622754 0.85368549 0.87163174 0.87455446 0.88360778
|
|
0.88063159 0.87466139 0.88954234 0.87758412]
|
|
|
|
mean value: 0.8757395922440833
|
|
|
|
key: test_jcc
|
|
value: [0.73913043 0.52 0.60869565 0.54545455 0.76190476 0.60869565
|
|
0.54545455 0.64 0.58333333 0.68181818]
|
|
|
|
mean value: 0.6234487107095803
|
|
|
|
key: train_jcc
|
|
value: [0.76315789 0.79787234 0.74871795 0.77368421 0.78238342 0.79032258
|
|
0.78835979 0.77894737 0.8 0.77956989]
|
|
|
|
mean value: 0.7803015443994878
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.29200125 0.3083663 0.24942446 0.24916959 0.26563001 0.24448633
|
|
0.1965251 0.36134005 0.30924296 0.26959085]
|
|
|
|
mean value: 0.2745776891708374
|
|
|
|
key: score_time
|
|
value: [0.02113938 0.02300549 0.02384067 0.02109218 0.02198529 0.02027202
|
|
0.0121603 0.02535272 0.01261783 0.01244164]
|
|
|
|
mean value: 0.01939074993133545
|
|
|
|
key: test_mcc
|
|
value: [0.68803296 0.42163702 0.51319869 0.46019501 0.56725146 0.51461988
|
|
0.40643275 0.62807634 0.40469382 0.62280702]
|
|
|
|
mean value: 0.5226944953325902
|
|
|
|
key: train_mcc
|
|
value: [0.73055202 0.67708385 0.65442595 0.74328997 0.63590019 0.76729762
|
|
0.64184637 0.65393941 0.65548592 0.62446959]
|
|
|
|
mean value: 0.6784290892310385
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.71052632 0.75675676 0.72972973 0.78378378 0.75675676
|
|
0.7027027 0.81081081 0.7027027 0.81081081]
|
|
|
|
mean value: 0.7606685633001422
|
|
|
|
key: train_accuracy
|
|
value: [0.86526946 0.83832335 0.82686567 0.87164179 0.81791045 0.88358209
|
|
0.82089552 0.82686567 0.82686567 0.8119403 ]
|
|
|
|
mean value: 0.8390159978550362
|
|
|
|
key: test_fscore
|
|
value: [0.85 0.7027027 0.74285714 0.70588235 0.77777778 0.75675676
|
|
0.7027027 0.82926829 0.71794872 0.81081081]
|
|
|
|
mean value: 0.7596707257180715
|
|
|
|
key: train_fscore
|
|
value: [0.86567164 0.84117647 0.83139535 0.87240356 0.820059 0.88288288
|
|
0.82142857 0.82840237 0.83236994 0.81524927]
|
|
|
|
mean value: 0.8411039049331559
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.72222222 0.76470588 0.75 0.77777778 0.73684211
|
|
0.72222222 0.77272727 0.7 0.83333333]
|
|
|
|
mean value: 0.7589354625422737
|
|
|
|
key: train_precision
|
|
value: [0.86309524 0.8265896 0.8125 0.86982249 0.8128655 0.89090909
|
|
0.81656805 0.81871345 0.80446927 0.79885057]
|
|
|
|
mean value: 0.8314383252748512
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.68421053 0.72222222 0.66666667 0.77777778 0.77777778
|
|
0.68421053 0.89473684 0.73684211 0.78947368]
|
|
|
|
mean value: 0.7628654970760234
|
|
|
|
key: train_recall
|
|
value: [0.86826347 0.85628743 0.85119048 0.875 0.82738095 0.875
|
|
0.82634731 0.83832335 0.86227545 0.83233533]
|
|
|
|
mean value: 0.851240376390077
|
|
|
|
key: test_roc_auc
|
|
value: [0.84210526 0.71052632 0.75584795 0.72807018 0.78362573 0.75730994
|
|
0.70321637 0.80847953 0.70175439 0.81140351]
|
|
|
|
mean value: 0.760233918128655
|
|
|
|
key: train_roc_auc
|
|
value: [0.86526946 0.83832335 0.82679284 0.87163174 0.81788209 0.88360778
|
|
0.82091175 0.82689977 0.82697106 0.812001 ]
|
|
|
|
mean value: 0.8390290846877674
|
|
|
|
key: test_jcc
|
|
value: [0.73913043 0.54166667 0.59090909 0.54545455 0.63636364 0.60869565
|
|
0.54166667 0.70833333 0.56 0.68181818]
|
|
|
|
mean value: 0.6154038208168643
|
|
|
|
key: train_jcc
|
|
value: [0.76315789 0.72588832 0.71144279 0.77368421 0.695 0.79032258
|
|
0.6969697 0.70707071 0.71287129 0.68811881]
|
|
|
|
mean value: 0.7264526299901373
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.75
|