19759 lines
981 KiB
Text
19759 lines
981 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_8020.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 858
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 858
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 175
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification [COMPLETE data]: 80/20
|
|
Original data size: (858, 175)
|
|
Train data size: (686, 175)
|
|
Test data size: (172, 175)
|
|
y_train numbers: Counter({0: 590, 1: 96})
|
|
y_train ratio: 6.145833333333333
|
|
|
|
y_test_numbers: Counter({0: 148, 1: 24})
|
|
y_test ratio: 6.166666666666667
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: False
|
|
Original Data
|
|
Counter({0: 590, 1: 96}) Data dim: (686, 175)
|
|
|
|
Simple Random OverSampling
|
|
Counter({0: 590, 1: 590})
|
|
(1180, 175)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 96, 1: 96})
|
|
(192, 175)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 590, 1: 590})
|
|
(1180, 175)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 590, 1: 590})
|
|
(1180, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis [COMPLETE DATA]: 80/20 split
|
|
Gene name: embB
|
|
Drug name: ethambutol
|
|
|
|
Output directory: /home/tanu/git/Data/ethambutol/output/ml/tts_cd_8020/
|
|
|
|
Sanity checks:
|
|
Total input features: 175
|
|
|
|
Training data size: (686, 175)
|
|
Test data size: (172, 175)
|
|
|
|
Target feature numbers (training data): Counter({0: 590, 1: 96})
|
|
Target features ratio (training data: 6.145833333333333
|
|
|
|
Target feature numbers (test data): Counter({0: 148, 1: 24})
|
|
Target features ratio (test data): 6.166666666666667
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04393697 0.03901267 0.04567313 0.04052663 0.04138112 0.04499602
|
|
0.04322577 0.04333091 0.04056883 0.0488832 ]
|
|
|
|
mean value: 0.04315352439880371
|
|
|
|
key: score_time
|
|
value: [0.01266527 0.01229858 0.01230812 0.01228571 0.01538992 0.01530671
|
|
0.01535559 0.0154953 0.01550841 0.01525068]
|
|
|
|
mean value: 0.014186429977416991
|
|
|
|
key: test_mcc
|
|
value: [0.36132554 0.52012466 0.31598405 0.67892378 0.38302888 0.16516678
|
|
0.55503877 0.33873645 0.64009548 0.55005827]
|
|
|
|
mean value: 0.45084826479846213
|
|
|
|
key: train_mcc
|
|
value: [0.68068968 0.69660244 0.66152308 0.62052467 0.66238289 0.70362551
|
|
0.65019332 0.62669606 0.60157044 0.67413466]
|
|
|
|
mean value: 0.6577942760752971
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.89855072 0.86956522 0.92753623 0.84057971 0.82608696
|
|
0.91176471 0.88235294 0.92647059 0.91176471]
|
|
|
|
mean value: 0.8864236999147485
|
|
|
|
key: train_accuracy
|
|
value: [0.93030794 0.93354943 0.92706645 0.91896272 0.92706645 0.93517018
|
|
0.92394822 0.91909385 0.91423948 0.92880259]
|
|
|
|
mean value: 0.9258207319056087
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.53333333 0.30769231 0.66666667 0.47619048 0.25
|
|
0.57142857 0.33333333 0.61538462 0.5 ]
|
|
|
|
mean value: 0.4654029304029304
|
|
|
|
key: train_fscore
|
|
value: [0.70344828 0.71724138 0.67625899 0.64285714 0.68085106 0.71830986
|
|
0.67132867 0.65277778 0.62937063 0.69014085]
|
|
|
|
mean value: 0.678258463736753
|
|
|
|
key: test_precision
|
|
value: [0.6 0.8 0.66666667 1. 0.45454545 0.33333333
|
|
0.8 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.7321212121212122
|
|
|
|
key: train_precision
|
|
value: [0.86440678 0.88135593 0.88679245 0.83333333 0.87272727 0.91071429
|
|
0.85714286 0.8245614 0.80357143 0.89090909]
|
|
|
|
mean value: 0.8625514836601635
|
|
|
|
key: test_recall
|
|
value: [0.3 0.4 0.2 0.5 0.5 0.2
|
|
0.44444444 0.22222222 0.44444444 0.33333333]
|
|
|
|
mean value: 0.35444444444444445
|
|
|
|
key: train_recall
|
|
value: [0.59302326 0.60465116 0.54651163 0.52325581 0.55813953 0.59302326
|
|
0.55172414 0.54022989 0.51724138 0.56321839]
|
|
|
|
mean value: 0.5591018444266239
|
|
|
|
key: test_roc_auc
|
|
value: [0.63305085 0.69152542 0.59152542 0.75 0.69915254 0.56610169
|
|
0.71374765 0.60263653 0.72222222 0.66666667]
|
|
|
|
mean value: 0.663662900188324
|
|
|
|
key: train_roc_auc
|
|
value: [0.78897867 0.79573424 0.7676061 0.75315333 0.77247843 0.79180353
|
|
0.76832911 0.76069875 0.74826287 0.77595948]
|
|
|
|
mean value: 0.7723004514035191
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.36363636 0.18181818 0.5 0.3125 0.14285714
|
|
0.4 0.2 0.44444444 0.33333333]
|
|
|
|
mean value: 0.3128589466089466
|
|
|
|
key: train_jcc
|
|
value: [0.54255319 0.55913978 0.51086957 0.47368421 0.51612903 0.56043956
|
|
0.50526316 0.48453608 0.45918367 0.52688172]
|
|
|
|
mean value: 0.513867997914539
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.32384491 0.91910911 1.02646565 1.02961421 1.20225215 1.10890198
|
|
2.41662621 1.12872148 0.91919041 1.21219325]
|
|
|
|
mean value: 1.2286919355392456
|
|
|
|
key: score_time
|
|
value: [0.01479459 0.01542282 0.01636267 0.0152626 0.0156219 0.01246262
|
|
0.01546311 0.01255488 0.01579666 0.01670265]
|
|
|
|
mean value: 0.015044450759887695
|
|
|
|
key: test_mcc
|
|
value: [0.54345279 0.52012466 0.52012466 0.81616728 0.49549267 0.2707383
|
|
0.64349815 0.33873645 0.64009548 0.55005827]
|
|
|
|
mean value: 0.5338488716026029
|
|
|
|
key: train_mcc
|
|
value: [0.74694341 0.74597919 0.7125152 0.69660244 0.73712542 0.76816022
|
|
0.72397767 0.52931113 0.72567119 0.71582958]
|
|
|
|
mean value: 0.7102115468750598
|
|
|
|
key: test_accuracy
|
|
value: [0.89855072 0.89855072 0.89855072 0.95652174 0.86956522 0.84057971
|
|
0.92647059 0.88235294 0.92647059 0.91176471]
|
|
|
|
mean value: 0.9009377664109122
|
|
|
|
key: train_accuracy
|
|
value: [0.94327391 0.94327391 0.93679092 0.93354943 0.94165316 0.94813614
|
|
0.93851133 0.90291262 0.93851133 0.9368932 ]
|
|
|
|
mean value: 0.9363505950601354
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.53333333 0.53333333 0.82352941 0.57142857 0.35294118
|
|
0.66666667 0.33333333 0.61538462 0.5 ]
|
|
|
|
mean value: 0.5518185735832795
|
|
|
|
key: train_fscore
|
|
value: [0.77124183 0.76821192 0.73103448 0.71724138 0.75675676 0.78082192
|
|
0.74324324 0.53846154 0.75 0.73469388]
|
|
|
|
mean value: 0.7291706946484904
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.8 0.8 1. 0.54545455 0.42857143
|
|
0.83333333 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.7788311688311689
|
|
|
|
key: train_precision
|
|
value: [0.88059701 0.89230769 0.89830508 0.88135593 0.90322581 0.95
|
|
0.90163934 0.81395349 0.87692308 0.9 ]
|
|
|
|
mean value: 0.8898307440191295
|
|
|
|
key: test_recall
|
|
value: [0.5 0.4 0.4 0.7 0.6 0.3
|
|
0.55555556 0.22222222 0.44444444 0.33333333]
|
|
|
|
mean value: 0.44555555555555554
|
|
|
|
key: train_recall
|
|
value: [0.68604651 0.6744186 0.61627907 0.60465116 0.65116279 0.6627907
|
|
0.63218391 0.40229885 0.65517241 0.62068966]
|
|
|
|
mean value: 0.620569366479551
|
|
|
|
key: test_roc_auc
|
|
value: [0.73305085 0.69152542 0.69152542 0.85 0.75762712 0.61610169
|
|
0.7693032 0.60263653 0.72222222 0.66666667]
|
|
|
|
mean value: 0.7100659133709981
|
|
|
|
key: train_roc_auc
|
|
value: [0.8354903 0.83061797 0.80248982 0.79573424 0.81993168 0.82857049
|
|
0.81044224 0.69361647 0.82005325 0.80469511]
|
|
|
|
mean value: 0.8041641559328074
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.36363636 0.36363636 0.7 0.4 0.21428571
|
|
0.5 0.2 0.44444444 0.33333333]
|
|
|
|
mean value: 0.3936002886002886
|
|
|
|
key: train_jcc
|
|
value: [0.62765957 0.62365591 0.57608696 0.55913978 0.60869565 0.64044944
|
|
0.59139785 0.36842105 0.6 0.58064516]
|
|
|
|
mean value: 0.5776151383674982
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01712132 0.01361442 0.01096654 0.01423478 0.0155673 0.01126385
|
|
0.01666474 0.01238942 0.01196647 0.01603627]
|
|
|
|
mean value: 0.013982510566711426
|
|
|
|
key: score_time
|
|
value: [0.01399755 0.00995302 0.01040149 0.01460528 0.00965667 0.00996089
|
|
0.01111722 0.01012516 0.00923634 0.015414 ]
|
|
|
|
mean value: 0.011446762084960937
|
|
|
|
key: test_mcc
|
|
value: [0.27056508 0.41525424 0.60795647 0.49549267 0.48169897 0.38188747
|
|
0.47227255 0.14119489 0.38837931 0.33707477]
|
|
|
|
mean value: 0.39917764285215457
|
|
|
|
key: train_mcc
|
|
value: [0.53711401 0.54553606 0.52218082 0.52540089 0.50422978 0.49504965
|
|
0.48152774 0.51790862 0.51115371 0.51202848]
|
|
|
|
mean value: 0.5152129748873536
|
|
|
|
key: test_accuracy
|
|
value: [0.8115942 0.85507246 0.89855072 0.86956522 0.84057971 0.8115942
|
|
0.85294118 0.76470588 0.83823529 0.76470588]
|
|
|
|
mean value: 0.8307544757033248
|
|
|
|
key: train_accuracy
|
|
value: [0.86547812 0.86709887 0.86061588 0.86223663 0.8541329 0.84927066
|
|
0.84304207 0.85113269 0.87702265 0.84789644]
|
|
|
|
mean value: 0.8577926914341762
|
|
|
|
key: test_fscore
|
|
value: [0.38095238 0.5 0.66666667 0.57142857 0.56 0.48
|
|
0.54545455 0.27272727 0.47619048 0.42857143]
|
|
|
|
mean value: 0.4881991341991342
|
|
|
|
key: train_fscore
|
|
value: [0.60287081 0.60952381 0.59047619 0.59330144 0.5754717 0.56744186
|
|
0.55707763 0.58558559 0.58241758 0.58035714]
|
|
|
|
mean value: 0.5844523743813239
|
|
|
|
key: test_precision
|
|
value: [0.36363636 0.5 0.63636364 0.54545455 0.46666667 0.4
|
|
0.46153846 0.23076923 0.41666667 0.31578947]
|
|
|
|
mean value: 0.4336885044779782
|
|
|
|
key: train_precision
|
|
value: [0.51219512 0.51612903 0.5 0.50406504 0.48412698 0.47286822
|
|
0.46212121 0.48148148 0.55789474 0.47445255]
|
|
|
|
mean value: 0.49653343812302625
|
|
|
|
key: test_recall
|
|
value: [0.4 0.5 0.7 0.6 0.7 0.6
|
|
0.66666667 0.33333333 0.55555556 0.66666667]
|
|
|
|
mean value: 0.5722222222222222
|
|
|
|
key: train_recall
|
|
value: [0.73255814 0.74418605 0.72093023 0.72093023 0.70930233 0.70930233
|
|
0.70114943 0.74712644 0.6091954 0.74712644]
|
|
|
|
mean value: 0.7141807003475007
|
|
|
|
key: test_roc_auc
|
|
value: [0.64067797 0.70762712 0.81610169 0.75762712 0.78220339 0.72372881
|
|
0.7740113 0.5819209 0.71845574 0.72316384]
|
|
|
|
mean value: 0.7225517890772127
|
|
|
|
key: train_roc_auc
|
|
value: [0.80978189 0.81559585 0.8020847 0.80302632 0.79344589 0.79062103
|
|
0.78371972 0.80764985 0.76504968 0.80576661]
|
|
|
|
mean value: 0.7976741543168765
|
|
|
|
key: test_jcc
|
|
value: [0.23529412 0.33333333 0.5 0.4 0.38888889 0.31578947
|
|
0.375 0.15789474 0.3125 0.27272727]
|
|
|
|
mean value: 0.32914278231228694
|
|
|
|
key: train_jcc
|
|
value: [0.43150685 0.43835616 0.41891892 0.42176871 0.40397351 0.3961039
|
|
0.38607595 0.41401274 0.41085271 0.40880503]
|
|
|
|
mean value: 0.41303744789836405
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01327634 0.01187444 0.01114702 0.01590014 0.01698303 0.01133466
|
|
0.01096249 0.01086736 0.01096678 0.01408648]
|
|
|
|
mean value: 0.012739872932434082
|
|
|
|
key: score_time
|
|
value: [0.01134276 0.00931382 0.00909352 0.0146997 0.01186562 0.00923777
|
|
0.00935602 0.00927663 0.00955176 0.01573372]
|
|
|
|
mean value: 0.010947132110595703
|
|
|
|
key: test_mcc
|
|
value: [ 0.07404322 0.40709555 0.04372729 -0.12705137 0.04372729 0.40709555
|
|
0.22250453 0.15330689 0.2712269 0.38877165]
|
|
|
|
mean value: 0.18844474963906296
|
|
|
|
key: train_mcc
|
|
value: [0.36745343 0.29282032 0.26746977 0.35557854 0.289342 0.31988206
|
|
0.24930053 0.35763321 0.29630871 0.29597565]
|
|
|
|
mean value: 0.30917642231712034
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.86956522 0.8115942 0.76811594 0.8115942 0.86956522
|
|
0.85294118 0.82352941 0.86764706 0.88235294]
|
|
|
|
mean value: 0.8382992327365729
|
|
|
|
key: train_accuracy
|
|
value: [0.86709887 0.84764992 0.8541329 0.86709887 0.8541329 0.86385737
|
|
0.84466019 0.86893204 0.85598706 0.85113269]
|
|
|
|
mean value: 0.857468280069026
|
|
|
|
key: test_fscore
|
|
value: [0.14285714 0.47058824 0.13333333 0. 0.13333333 0.47058824
|
|
0.28571429 0.25 0.30769231 0.42857143]
|
|
|
|
mean value: 0.2622678302090067
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0.43055556 0.37333333 0.32835821 0.41428571 0.35714286 0.37313433
|
|
0.32394366 0.40875912 0.35971223 0.36986301]
|
|
|
|
mean value: 0.3739088027604773
|
|
|
|
key: test_precision
|
|
value: [0.25 0.57142857 0.2 0. 0.2 0.57142857
|
|
0.4 0.28571429 0.5 0.6 ]
|
|
|
|
mean value: 0.3578571428571429
|
|
|
|
key: train_precision
|
|
value: [0.53448276 0.4375 0.45833333 0.53703704 0.46296296 0.52083333
|
|
0.41818182 0.56 0.48076923 0.45762712]
|
|
|
|
mean value: 0.4867727592882473
|
|
|
|
key: test_recall
|
|
value: [0.1 0.4 0.1 0. 0.1 0.4
|
|
0.22222222 0.22222222 0.22222222 0.33333333]
|
|
|
|
mean value: 0.21
|
|
|
|
key: train_recall
|
|
value: [0.36046512 0.3255814 0.25581395 0.3372093 0.29069767 0.29069767
|
|
0.26436782 0.32183908 0.28735632 0.31034483]
|
|
|
|
mean value: 0.3044373162256081
|
|
|
|
key: test_roc_auc
|
|
value: [0.52457627 0.67457627 0.51610169 0.44915254 0.51610169 0.67457627
|
|
0.58568738 0.56873823 0.59416196 0.64971751]
|
|
|
|
mean value: 0.5753389830508474
|
|
|
|
key: train_roc_auc
|
|
value: [0.65480883 0.62889239 0.60342487 0.64506416 0.61804187 0.62369159
|
|
0.60205208 0.64020391 0.61825443 0.62504059]
|
|
|
|
mean value: 0.625947471672126
|
|
|
|
key: test_jcc
|
|
value: [0.07692308 0.30769231 0.07142857 0. 0.07142857 0.30769231
|
|
0.16666667 0.14285714 0.18181818 0.27272727]
|
|
|
|
mean value: 0.15992340992340992
|
|
|
|
key: train_jcc
|
|
value: [0.27433628 0.2295082 0.19642857 0.26126126 0.2173913 0.2293578
|
|
0.19327731 0.25688073 0.21929825 0.22689076]
|
|
|
|
mean value: 0.23046304618958285
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01050663 0.01416564 0.01328468 0.01300573 0.01122046 0.01673555
|
|
0.01188731 0.01215339 0.01065969 0.01099586]
|
|
|
|
mean value: 0.012461495399475098
|
|
|
|
key: score_time
|
|
value: [0.08505559 0.02263618 0.02697301 0.02342725 0.01823258 0.02503514
|
|
0.01838303 0.05894256 0.02018142 0.02197146]
|
|
|
|
mean value: 0.03208382129669189
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.36132554 0. 0.17426801 0.11410535 0.
|
|
0. 0. 0. -0.06798894]
|
|
|
|
mean value: 0.058170995704217
|
|
|
|
key: train_mcc
|
|
value: [0.48550202 0.33249557 0.41481175 0.35162571 0.37513356 0.44005783
|
|
0.41199177 0.40026653 0.35861184 0.46114921]
|
|
|
|
mean value: 0.40316457878984924
|
|
|
|
key: test_accuracy
|
|
value: [0.85507246 0.86956522 0.85507246 0.85507246 0.84057971 0.85507246
|
|
0.86764706 0.86764706 0.86764706 0.83823529]
|
|
|
|
mean value: 0.8571611253196931
|
|
|
|
key: train_accuracy
|
|
value: [0.89789303 0.87844408 0.88816856 0.88006483 0.88330632 0.89141005
|
|
0.88673139 0.88511327 0.8802589 0.89320388]
|
|
|
|
mean value: 0.8864594315326798
|
|
|
|
key: test_fscore
|
|
value: [0. 0.4 0. 0.16666667 0.15384615 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.07205128205128206
|
|
|
|
key: train_fscore
|
|
value: [0.45217391 0.27184466 0.35514019 0.31481481 0.30769231 0.40707965
|
|
0.36363636 0.36036036 0.28846154 0.4 ]
|
|
|
|
mean value: 0.35212037911366245
|
|
|
|
key: test_precision
|
|
value: [0. 0.6 0. 0.5 0.33333333 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.14333333333333334
|
|
|
|
key: train_precision
|
|
value: [0.89655172 0.82352941 0.9047619 0.77272727 0.88888889 0.85185185
|
|
0.86956522 0.83333333 0.88235294 0.95652174]
|
|
|
|
mean value: 0.8680084285164098
|
|
|
|
key: test_recall
|
|
value: [0. 0.3 0. 0.1 0.1 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.05
|
|
|
|
key: train_recall
|
|
value: [0.30232558 0.1627907 0.22093023 0.19767442 0.18604651 0.26744186
|
|
0.22988506 0.22988506 0.17241379 0.25287356]
|
|
|
|
mean value: 0.22222667735899493
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.63305085 0.5 0.54152542 0.53305085 0.5
|
|
0.5 0.5 0.5 0.48305085]
|
|
|
|
mean value: 0.5190677966101696
|
|
|
|
key: train_roc_auc
|
|
value: [0.64833793 0.57857049 0.60858188 0.59412911 0.59114002 0.62995445
|
|
0.61211767 0.61117605 0.58432366 0.62549516]
|
|
|
|
mean value: 0.6083826418810041
|
|
|
|
key: test_jcc
|
|
value: [0. 0.25 0. 0.09090909 0.08333333 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.04242424242424243
|
|
|
|
key: train_jcc
|
|
value: [0.29213483 0.15730337 0.21590909 0.18681319 0.18181818 0.25555556
|
|
0.22222222 0.21978022 0.16853933 0.25 ]
|
|
|
|
mean value: 0.21500759851883447
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02781272 0.03568673 0.02287269 0.02391934 0.02124023 0.02135539
|
|
0.02222252 0.0265336 0.02634096 0.02610207]
|
|
|
|
mean value: 0.025408625602722168
|
|
|
|
key: score_time
|
|
value: [0.01419449 0.0132699 0.01211572 0.01240897 0.01170635 0.01184607
|
|
0.01185584 0.013304 0.01356554 0.01337123]
|
|
|
|
mean value: 0.01276381015777588
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.29455849 0. 0. -0.0711298 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.022342869263583545
|
|
|
|
key: train_mcc
|
|
value: [0.37861987 0.31893611 0.34995401 0.33477889 0.36454508 0.39223416
|
|
0.28292265 0.38964993 0.33257502 0.29738732]
|
|
|
|
mean value: 0.3441603041483397
|
|
|
|
key: test_accuracy
|
|
value: [0.85507246 0.86956522 0.85507246 0.85507246 0.82608696 0.85507246
|
|
0.86764706 0.86764706 0.86764706 0.86764706]
|
|
|
|
mean value: 0.8586530264279625
|
|
|
|
key: train_accuracy
|
|
value: [0.88330632 0.87682334 0.88006483 0.87844408 0.88168558 0.88492707
|
|
0.87216828 0.88349515 0.87702265 0.87378641]
|
|
|
|
mean value: 0.8791723707468543
|
|
|
|
key: test_fscore
|
|
value: [0. 0.18181818 0. 0. 0. 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.01818181818181818
|
|
|
|
key: train_fscore
|
|
value: [0.28 0.20833333 0.24489796 0.22680412 0.26262626 0.2970297
|
|
0.16842105 0.29411765 0.2244898 0.20408163]
|
|
|
|
mean value: 0.24108015100867378
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.1
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.90909091]
|
|
|
|
mean value: 0.990909090909091
|
|
|
|
key: test_recall
|
|
value: [0. 0.1 0. 0. 0. 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.01
|
|
|
|
key: train_recall
|
|
value: [0.1627907 0.11627907 0.13953488 0.12790698 0.15116279 0.1744186
|
|
0.09195402 0.17241379 0.12643678 0.11494253]
|
|
|
|
mean value: 0.13778401496925957
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.55 0.5 0.5 0.48305085 0.5
|
|
0.5 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.5033050847457627
|
|
|
|
key: train_roc_auc
|
|
value: [0.58139535 0.55813953 0.56976744 0.56395349 0.5755814 0.5872093
|
|
0.54597701 0.5862069 0.56321839 0.55652964]
|
|
|
|
mean value: 0.568797845526061
|
|
|
|
key: test_jcc
|
|
value: [0. 0.1 0. 0. 0. 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.01
|
|
|
|
key: train_jcc
|
|
value: [0.1627907 0.11627907 0.13953488 0.12790698 0.15116279 0.1744186
|
|
0.09195402 0.17241379 0.12643678 0.11363636]
|
|
|
|
mean value: 0.1376533984593327
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [4.26158357 1.45655727 3.64864111 3.52746892 0.9711771 2.40774035
|
|
0.71744657 1.178128 1.99247479 1.22254539]
|
|
|
|
mean value: 2.138376307487488
|
|
|
|
key: score_time
|
|
value: [0.01353788 0.01282167 0.01258874 0.01611829 0.01909518 0.01284266
|
|
0.01255178 0.01317525 0.012532 0.0124979 ]
|
|
|
|
mean value: 0.013776135444641114
|
|
|
|
key: test_mcc
|
|
value: [0.36527517 0.38302888 0.52012466 0.51786274 0.45664488 0.13438105
|
|
0.38877165 0.55503877 0.55503877 0.55503877]
|
|
|
|
mean value: 0.4431205338549038
|
|
|
|
key: train_mcc
|
|
value: [0.95296181 0.68547465 0.79942949 0.81494992 0.66752734 0.83088349
|
|
0.61474702 0.65885434 0.77116828 0.74288482]
|
|
|
|
mean value: 0.7538881160241437
|
|
|
|
key: test_accuracy
|
|
value: [0.85507246 0.84057971 0.89855072 0.89855072 0.82608696 0.8115942
|
|
0.88235294 0.91176471 0.91176471 0.91176471]
|
|
|
|
mean value: 0.8748081841432225
|
|
|
|
key: train_accuracy
|
|
value: [0.98865478 0.92220421 0.95461912 0.95786062 0.91734198 0.96110211
|
|
0.91747573 0.92556634 0.94822006 0.94174757]
|
|
|
|
mean value: 0.9434792528835109
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 0.47619048 0.53333333 0.46153846 0.53846154 0.23529412
|
|
0.42857143 0.57142857 0.57142857 0.57142857]
|
|
|
|
mean value: 0.48321195144724555
|
|
|
|
key: train_fscore
|
|
value: [0.95953757 0.73033708 0.81578947 0.82432432 0.7150838 0.85
|
|
0.63309353 0.68055556 0.78947368 0.76923077]
|
|
|
|
mean value: 0.7767425781973945
|
|
|
|
key: test_precision
|
|
value: [0.5 0.45454545 0.8 1. 0.4375 0.28571429
|
|
0.6 0.8 0.8 0.8 ]
|
|
|
|
mean value: 0.6477759740259741
|
|
|
|
key: train_precision
|
|
value: [0.95402299 0.70652174 0.93939394 0.98387097 0.68817204 0.91891892
|
|
0.84615385 0.85964912 0.92307692 0.86956522]
|
|
|
|
mean value: 0.868934570613082
|
|
|
|
key: test_recall
|
|
value: [0.4 0.5 0.4 0.3 0.7 0.2
|
|
0.33333333 0.44444444 0.44444444 0.44444444]
|
|
|
|
mean value: 0.41666666666666663
|
|
|
|
key: train_recall
|
|
value: [0.96511628 0.75581395 0.72093023 0.70930233 0.74418605 0.79069767
|
|
0.50574713 0.56321839 0.68965517 0.68965517]
|
|
|
|
mean value: 0.7134322373696873
|
|
|
|
key: test_roc_auc
|
|
value: [0.66610169 0.69915254 0.69152542 0.65 0.77372881 0.55762712
|
|
0.64971751 0.71374765 0.71374765 0.71374765]
|
|
|
|
mean value: 0.682909604519774
|
|
|
|
key: train_roc_auc
|
|
value: [0.97879166 0.85248325 0.85669864 0.85370954 0.84478606 0.88969912
|
|
0.74534061 0.77407624 0.84011949 0.83635301]
|
|
|
|
mean value: 0.847205760869401
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 0.3125 0.36363636 0.3 0.36842105 0.13333333
|
|
0.27272727 0.4 0.4 0.4 ]
|
|
|
|
mean value: 0.3236332308042834
|
|
|
|
key: train_jcc
|
|
value: [0.92222222 0.57522124 0.68888889 0.70114943 0.55652174 0.73913043
|
|
0.46315789 0.51578947 0.65217391 0.625 ]
|
|
|
|
mean value: 0.6439255230714095
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04723763 0.03345871 0.03431988 0.03432274 0.03430414 0.03437066
|
|
0.03654885 0.03251791 0.03627992 0.03249931]
|
|
|
|
mean value: 0.035585975646972655
|
|
|
|
key: score_time
|
|
value: [0.00952077 0.00907564 0.00997496 0.00915289 0.00922751 0.00911045
|
|
0.00916862 0.00950861 0.00921798 0.00913358]
|
|
|
|
mean value: 0.009309101104736327
|
|
|
|
key: test_mcc
|
|
value: [0.67981004 0.38302888 0.53220339 0.54345279 0.46279992 0.57141623
|
|
0.61604994 0.61581921 0.53552216 0.53552216]
|
|
|
|
mean value: 0.5475624714236299
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92753623 0.84057971 0.88405797 0.89855072 0.85507246 0.88405797
|
|
0.89705882 0.91176471 0.88235294 0.88235294]
|
|
|
|
mean value: 0.8863384484228474
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.47619048 0.6 0.58823529 0.54545455 0.63636364
|
|
0.66666667 0.66666667 0.6 0.6 ]
|
|
|
|
mean value: 0.6085459638400814
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.45454545 0.6 0.71428571 0.5 0.58333333
|
|
0.58333333 0.66666667 0.54545455 0.54545455]
|
|
|
|
mean value: 0.605021645021645
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 0.5 0.6 0.5 0.6 0.7
|
|
0.77777778 0.66666667 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6277777777777778
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79152542 0.69915254 0.76610169 0.73305085 0.74915254 0.80762712
|
|
0.84651601 0.8079096 0.79096045 0.79096045]
|
|
|
|
mean value: 0.7782956685499058
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.3125 0.42857143 0.41666667 0.375 0.46666667
|
|
0.5 0.5 0.42857143 0.42857143]
|
|
|
|
mean value: 0.44020021645021645
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13394237 0.14602733 0.14849257 0.13794088 0.13552451 0.13577223
|
|
0.15195465 0.14996815 0.13749766 0.13716388]
|
|
|
|
mean value: 0.14142842292785646
|
|
|
|
key: score_time
|
|
value: [0.01836276 0.02015781 0.02024364 0.01805043 0.01789689 0.01910591
|
|
0.02044892 0.0197947 0.01975703 0.01919222]
|
|
|
|
mean value: 0.01930103302001953
|
|
|
|
key: test_mcc
|
|
value: [0.31598405 0.60255865 0.41966582 0.29455849 0.11410535 0.20252642
|
|
0.33873645 0.44570528 0.55005827 0.44570528]
|
|
|
|
mean value: 0.3729604060375398
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.91304348 0.88405797 0.86956522 0.84057971 0.84057971
|
|
0.88235294 0.89705882 0.91176471 0.89705882]
|
|
|
|
mean value: 0.8805626598465472
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.30769231 0.57142857 0.33333333 0.18181818 0.15384615 0.26666667
|
|
0.33333333 0.36363636 0.5 0.36363636]
|
|
|
|
mean value: 0.3375391275391275
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 1. 1. 0.33333333 0.4
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: 0.8066666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.2 0.4 0.2 0.1 0.1 0.2
|
|
0.22222222 0.22222222 0.33333333 0.22222222]
|
|
|
|
mean value: 0.22
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.59152542 0.7 0.6 0.55 0.53305085 0.57457627
|
|
0.60263653 0.61111111 0.66666667 0.61111111]
|
|
|
|
mean value: 0.6040677966101695
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.18181818 0.4 0.2 0.1 0.08333333 0.15384615
|
|
0.2 0.22222222 0.33333333 0.22222222]
|
|
|
|
mean value: 0.20967754467754468
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01226044 0.01089144 0.01168466 0.01144099 0.0110755 0.01078558
|
|
0.01158714 0.01159191 0.01107836 0.01313829]
|
|
|
|
mean value: 0.011553430557250976
|
|
|
|
key: score_time
|
|
value: [0.00907612 0.00888062 0.00909138 0.00884104 0.0088594 0.00893164
|
|
0.00948095 0.00896335 0.00907302 0.01005626]
|
|
|
|
mean value: 0.009125375747680664
|
|
|
|
key: test_mcc
|
|
value: [ 0.32952628 0.35418361 0.2366834 0.45738492 0.43333787 -0.02050014
|
|
0.41767262 0.29982309 0.03149991 0.23163842]
|
|
|
|
mean value: 0.2771249992545024
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84057971 0.82608696 0.82608696 0.88405797 0.84057971 0.76811594
|
|
0.85294118 0.82352941 0.80882353 0.82352941]
|
|
|
|
mean value: 0.8294330775788576
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.42105263 0.45454545 0.33333333 0.5 0.52173913 0.11111111
|
|
0.5 0.4 0.13333333 0.33333333]
|
|
|
|
mean value: 0.37084483276702956
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.44444444 0.41666667 0.375 0.66666667 0.46153846 0.125
|
|
0.45454545 0.36363636 0.16666667 0.33333333]
|
|
|
|
mean value: 0.38074980574980577
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.4 0.5 0.3 0.4 0.6 0.1
|
|
0.55555556 0.44444444 0.11111111 0.33333333]
|
|
|
|
mean value: 0.37444444444444447
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.65762712 0.69067797 0.60762712 0.68305085 0.74067797 0.49067797
|
|
0.72693032 0.66290019 0.51318267 0.61581921]
|
|
|
|
mean value: 0.6389171374764595
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.26666667 0.29411765 0.2 0.33333333 0.35294118 0.05882353
|
|
0.33333333 0.25 0.07142857 0.2 ]
|
|
|
|
mean value: 0.23606442577030812
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.23920155 2.16665769 2.27117991 2.24015403 2.28420401 2.1871779
|
|
2.18198299 2.18930173 2.23160648 2.13744617]
|
|
|
|
mean value: 2.2128912448883056
|
|
|
|
key: score_time
|
|
value: [0.10220766 0.10142517 0.10408783 0.09963489 0.10058045 0.09878421
|
|
0.09835505 0.09334397 0.09846711 0.09762025]
|
|
|
|
mean value: 0.09945065975189209
|
|
|
|
key: test_mcc
|
|
value: [0.60255865 0.60255865 0.41966582 0.41966582 0.2707383 0.16516678
|
|
0.45566119 0.64009548 0.64009548 0.55005827]
|
|
|
|
mean value: 0.47662644357710093
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91304348 0.91304348 0.88405797 0.88405797 0.84057971 0.82608696
|
|
0.89705882 0.92647059 0.92647059 0.91176471]
|
|
|
|
mean value: 0.8922634271099744
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.57142857 0.33333333 0.33333333 0.35294118 0.25
|
|
0.46153846 0.61538462 0.61538462 0.5 ]
|
|
|
|
mean value: 0.46047726783020904
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.42857143 0.33333333
|
|
0.75 1. 1. 1. ]
|
|
|
|
mean value: 0.8511904761904762
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.4 0.4 0.2 0.2 0.3 0.2
|
|
0.33333333 0.44444444 0.44444444 0.33333333]
|
|
|
|
mean value: 0.32555555555555554
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.7 0.6 0.6 0.61610169 0.56610169
|
|
0.65819209 0.72222222 0.72222222 0.66666667]
|
|
|
|
mean value: 0.65515065913371
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.4 0.2 0.2 0.21428571 0.14285714
|
|
0.3 0.44444444 0.44444444 0.33333333]
|
|
|
|
mean value: 0.30793650793650795
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.9122591 1.05636215 1.13255811 1.03158069 1.03058505 1.03063107
|
|
1.02702713 1.04627752 0.99530816 1.01853037]
|
|
|
|
mean value: 1.1281119346618653
|
|
|
|
key: score_time
|
|
value: [0.27934575 0.18957329 0.23676729 0.23349524 0.23496032 0.26561189
|
|
0.27556443 0.21443081 0.28642106 0.23911071]
|
|
|
|
mean value: 0.24552807807922364
|
|
|
|
key: test_mcc
|
|
value: [0.51786274 0.51786274 0.29455849 0.41966582 0.11410535 0.07404322
|
|
0.33873645 0. 0.55005827 0.44570528]
|
|
|
|
mean value: 0.327259837492259
|
|
|
|
key: train_mcc
|
|
value: [0.77734037 0.74502206 0.76955687 0.76091787 0.75299773 0.77734037
|
|
0.7714083 0.74796699 0.74796699 0.75583353]
|
|
|
|
mean value: 0.7606351080339614
|
|
|
|
key: test_accuracy
|
|
value: [0.89855072 0.89855072 0.86956522 0.88405797 0.84057971 0.82608696
|
|
0.88235294 0.86764706 0.91176471 0.89705882]
|
|
|
|
mean value: 0.877621483375959
|
|
|
|
key: train_accuracy
|
|
value: [0.94975689 0.94327391 0.94813614 0.9465154 0.94489465 0.94975689
|
|
0.94822006 0.9433657 0.9433657 0.94498382]
|
|
|
|
mean value: 0.9462269148662754
|
|
|
|
key: test_fscore
|
|
value: [0.46153846 0.46153846 0.18181818 0.33333333 0.15384615 0.14285714
|
|
0.33333333 0. 0.5 0.36363636]
|
|
|
|
mean value: 0.2931901431901432
|
|
|
|
key: train_fscore
|
|
value: [0.78014184 0.74820144 0.77142857 0.76595745 0.75714286 0.78014184
|
|
0.77777778 0.75177305 0.75177305 0.76056338]
|
|
|
|
mean value: 0.764490125952237
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.33333333 0.25
|
|
0.66666667 0. 1. 1. ]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_precision
|
|
value: [1. 0.98113208 1. 0.98181818 0.98148148 1.
|
|
0.98245614 0.98148148 0.98148148 0.98181818]
|
|
|
|
mean value: 0.9871669023903383
|
|
|
|
key: test_recall
|
|
value: [0.3 0.3 0.1 0.2 0.1 0.1
|
|
0.22222222 0. 0.33333333 0.22222222]
|
|
|
|
mean value: 0.18777777777777777
|
|
|
|
key: train_recall
|
|
value: [0.63953488 0.60465116 0.62790698 0.62790698 0.61627907 0.63953488
|
|
0.64367816 0.6091954 0.6091954 0.62068966]
|
|
|
|
mean value: 0.6238572574178027
|
|
|
|
key: test_roc_auc
|
|
value: [0.65 0.65 0.55 0.6 0.53305085 0.52457627
|
|
0.60263653 0.5 0.66666667 0.61111111]
|
|
|
|
mean value: 0.588804143126177
|
|
|
|
key: train_roc_auc
|
|
value: [0.81976744 0.80138396 0.81395349 0.81301187 0.80719792 0.81976744
|
|
0.82089746 0.80365608 0.80365608 0.80940321]
|
|
|
|
mean value: 0.8112694949989202
|
|
|
|
key: test_jcc
|
|
value: [0.3 0.3 0.1 0.2 0.08333333 0.07692308
|
|
0.2 0. 0.33333333 0.22222222]
|
|
|
|
mean value: 0.1815811965811966
|
|
|
|
key: train_jcc
|
|
value: [0.63953488 0.59770115 0.62790698 0.62068966 0.6091954 0.63953488
|
|
0.63636364 0.60227273 0.60227273 0.61363636]
|
|
|
|
mean value: 0.6189108405628053
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01200724 0.0116694 0.01067233 0.01104212 0.01295972 0.01189327
|
|
0.01120949 0.01093292 0.01097274 0.01070261]
|
|
|
|
mean value: 0.011406183242797852
|
|
|
|
key: score_time
|
|
value: [0.00937772 0.008991 0.00931144 0.00978422 0.01036787 0.00992846
|
|
0.00930786 0.00912571 0.00901794 0.00904083]
|
|
|
|
mean value: 0.00942530632019043
|
|
|
|
key: test_mcc
|
|
value: [ 0.07404322 0.40709555 0.04372729 -0.12705137 0.04372729 0.40709555
|
|
0.22250453 0.15330689 0.2712269 0.38877165]
|
|
|
|
mean value: 0.18844474963906296
|
|
|
|
key: train_mcc
|
|
value: [0.36745343 0.29282032 0.26746977 0.35557854 0.289342 0.31988206
|
|
0.24930053 0.35763321 0.29630871 0.29597565]
|
|
|
|
mean value: 0.30917642231712034
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.86956522 0.8115942 0.76811594 0.8115942 0.86956522
|
|
0.85294118 0.82352941 0.86764706 0.88235294]
|
|
|
|
mean value: 0.8382992327365729
|
|
|
|
key: train_accuracy
|
|
value: [0.86709887 0.84764992 0.8541329 0.86709887 0.8541329 0.86385737
|
|
0.84466019 0.86893204 0.85598706 0.85113269]
|
|
|
|
mean value: 0.857468280069026
|
|
|
|
key: test_fscore
|
|
value: [0.14285714 0.47058824 0.13333333 0. 0.13333333 0.47058824
|
|
0.28571429 0.25 0.30769231 0.42857143]
|
|
|
|
mean value: 0.2622678302090067
|
|
|
|
key: train_fscore
|
|
value: [0.43055556 0.37333333 0.32835821 0.41428571 0.35714286 0.37313433
|
|
0.32394366 0.40875912 0.35971223 0.36986301]
|
|
|
|
mean value: 0.3739088027604773
|
|
|
|
key: test_precision
|
|
value: [0.25 0.57142857 0.2 0. 0.2 0.57142857
|
|
0.4 0.28571429 0.5 0.6 ]
|
|
|
|
mean value: 0.3578571428571429
|
|
|
|
key: train_precision
|
|
value: [0.53448276 0.4375 0.45833333 0.53703704 0.46296296 0.52083333
|
|
0.41818182 0.56 0.48076923 0.45762712]
|
|
|
|
mean value: 0.4867727592882473
|
|
|
|
key: test_recall
|
|
value: [0.1 0.4 0.1 0. 0.1 0.4
|
|
0.22222222 0.22222222 0.22222222 0.33333333]
|
|
|
|
mean value: 0.21
|
|
|
|
key: train_recall
|
|
value: [0.36046512 0.3255814 0.25581395 0.3372093 0.29069767 0.29069767
|
|
0.26436782 0.32183908 0.28735632 0.31034483]
|
|
|
|
mean value: 0.3044373162256081
|
|
|
|
key: test_roc_auc
|
|
value: [0.52457627 0.67457627 0.51610169 0.44915254 0.51610169 0.67457627
|
|
0.58568738 0.56873823 0.59416196 0.64971751]
|
|
|
|
mean value: 0.5753389830508474
|
|
|
|
key: train_roc_auc
|
|
value: [0.65480883 0.62889239 0.60342487 0.64506416 0.61804187 0.62369159
|
|
0.60205208 0.64020391 0.61825443 0.62504059]
|
|
|
|
mean value: 0.625947471672126
|
|
|
|
key: test_jcc
|
|
value: [0.07692308 0.30769231 0.07142857 0. 0.07142857 0.30769231
|
|
0.16666667 0.14285714 0.18181818 0.27272727]
|
|
|
|
mean value: 0.15992340992340992
|
|
|
|
key: train_jcc
|
|
value: [0.27433628 0.2295082 0.19642857 0.26126126 0.2173913 0.2293578
|
|
0.19327731 0.25688073 0.21929825 0.22689076]
|
|
|
|
mean value: 0.23046304618958285
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.12750459 0.09174705 0.39032292 0.17599106 0.11976123 0.37831283
|
|
0.21472764 0.33090925 0.19453621 0.10146976]
|
|
|
|
mean value: 0.21252825260162353
|
|
|
|
key: score_time
|
|
value: [0.01121521 0.01107216 0.01113439 0.01633215 0.01168418 0.01697397
|
|
0.01266217 0.01118088 0.01209474 0.01144695]
|
|
|
|
mean value: 0.012579679489135742
|
|
|
|
key: test_mcc
|
|
value: [0.62245871 0.49386694 0.87964225 0.603494 0.58151047 0.50888863
|
|
0.72453254 0.66553576 0.72130589 0.86733896]
|
|
|
|
mean value: 0.6668574161843153
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91304348 0.88405797 0.97101449 0.91304348 0.86956522 0.85507246
|
|
0.94117647 0.92647059 0.94117647 0.97058824]
|
|
|
|
mean value: 0.9185208866155158
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.55555556 0.88888889 0.625 0.64 0.58333333
|
|
0.75 0.70588235 0.71428571 0.875 ]
|
|
|
|
mean value: 0.7004612511671335
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.625 1. 0.83333333 0.53333333 0.5
|
|
0.85714286 0.75 1. 1. ]
|
|
|
|
mean value: 0.7848809523809523
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 0.5 0.8 0.5 0.8 0.7
|
|
0.66666667 0.66666667 0.55555556 0.77777778]
|
|
|
|
mean value: 0.6566666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.78305085 0.72457627 0.9 0.74152542 0.84067797 0.79067797
|
|
0.82485876 0.81638418 0.77777778 0.88888889]
|
|
|
|
mean value: 0.8088418079096045
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.38461538 0.8 0.45454545 0.47058824 0.41176471
|
|
0.6 0.54545455 0.55555556 0.77777778]
|
|
|
|
mean value: 0.5500301659125189
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.8
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05544567 0.08118701 0.09360313 0.09469151 0.0654223 0.09144473
|
|
0.08024359 0.07798028 0.1002059 0.07247376]
|
|
|
|
mean value: 0.08126978874206543
|
|
|
|
key: score_time
|
|
value: [0.01239872 0.02466393 0.02437568 0.0123632 0.01316738 0.015975
|
|
0.02184534 0.01921368 0.01916623 0.03731513]
|
|
|
|
mean value: 0.02004842758178711
|
|
|
|
key: test_mcc
|
|
value: [0.49386694 0.54345279 0.62245871 0.68003253 0.49549267 0.15810128
|
|
0.48775895 0.41767262 0.55503877 0.64349815]
|
|
|
|
mean value: 0.509737342438266
|
|
|
|
key: train_mcc
|
|
value: [0.79544964 0.82409065 0.76406794 0.76406794 0.7876373 0.85335366
|
|
0.78027199 0.7834439 0.77707579 0.80021405]
|
|
|
|
mean value: 0.7929672868917641
|
|
|
|
key: test_accuracy
|
|
value: [0.88405797 0.89855072 0.91304348 0.91304348 0.86956522 0.7826087
|
|
0.88235294 0.85294118 0.91176471 0.92647059]
|
|
|
|
mean value: 0.8834398976982097
|
|
|
|
key: train_accuracy
|
|
value: [0.95299838 0.95948136 0.9465154 0.9465154 0.94975689 0.96596434
|
|
0.94822006 0.94983819 0.94822006 0.95307443]
|
|
|
|
mean value: 0.9520584517421703
|
|
|
|
key: test_fscore
|
|
value: [0.55555556 0.58823529 0.66666667 0.72727273 0.57142857 0.28571429
|
|
0.55555556 0.5 0.57142857 0.66666667]
|
|
|
|
mean value: 0.5688523894406247
|
|
|
|
key: train_fscore
|
|
value: [0.81987578 0.8447205 0.78980892 0.78980892 0.81656805 0.87116564
|
|
0.80952381 0.80981595 0.80487805 0.82634731]
|
|
|
|
mean value: 0.8182512913809652
|
|
|
|
key: test_precision
|
|
value: [0.625 0.71428571 0.75 0.66666667 0.54545455 0.27272727
|
|
0.55555556 0.45454545 0.8 0.83333333]
|
|
|
|
mean value: 0.6217568542568542
|
|
|
|
key: train_precision
|
|
value: [0.88 0.90666667 0.87323944 0.87323944 0.8313253 0.92207792
|
|
0.83950617 0.86842105 0.85714286 0.8625 ]
|
|
|
|
mean value: 0.8714118845802786
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.6 0.8 0.6 0.3
|
|
0.55555556 0.55555556 0.44444444 0.55555556]
|
|
|
|
mean value: 0.5411111111111111
|
|
|
|
key: train_recall
|
|
value: [0.76744186 0.79069767 0.72093023 0.72093023 0.80232558 0.8255814
|
|
0.7816092 0.75862069 0.75862069 0.79310345]
|
|
|
|
mean value: 0.7719860999732692
|
|
|
|
key: test_roc_auc
|
|
value: [0.72457627 0.73305085 0.78305085 0.86610169 0.75762712 0.58220339
|
|
0.74387947 0.72693032 0.71374765 0.7693032 ]
|
|
|
|
mean value: 0.7400470809792844
|
|
|
|
key: train_roc_auc
|
|
value: [0.87524635 0.8887575 0.85199054 0.85199054 0.88798012 0.90714098
|
|
0.87856354 0.86989415 0.86895253 0.88619391]
|
|
|
|
mean value: 0.8766710160883295
|
|
|
|
key: test_jcc
|
|
value: [0.38461538 0.41666667 0.5 0.57142857 0.4 0.16666667
|
|
0.38461538 0.33333333 0.4 0.5 ]
|
|
|
|
mean value: 0.40573260073260076
|
|
|
|
key: train_jcc
|
|
value: [0.69473684 0.7311828 0.65263158 0.65263158 0.69 0.77173913
|
|
0.68 0.68041237 0.67346939 0.70408163]
|
|
|
|
mean value: 0.6930885317675891
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02182937 0.01169372 0.01117873 0.0118649 0.01282048 0.01190662
|
|
0.01106 0.01060486 0.01061296 0.01032686]
|
|
|
|
mean value: 0.012389850616455079
|
|
|
|
key: score_time
|
|
value: [0.01112247 0.0092876 0.00957966 0.01107502 0.00969148 0.00948739
|
|
0.00950313 0.0088563 0.0093708 0.00893807]
|
|
|
|
mean value: 0.009691190719604493
|
|
|
|
key: test_mcc
|
|
value: [0.20252642 0.51786274 0.52012466 0.54345279 0.32952628 0.31127585
|
|
0.72130589 0.31280044 0.55503877 0.2712269 ]
|
|
|
|
mean value: 0.4285140732971342
|
|
|
|
key: train_mcc
|
|
value: [0.46807884 0.4486141 0.43128313 0.43399493 0.48067059 0.48067059
|
|
0.4524369 0.45617593 0.45616802 0.45570181]
|
|
|
|
mean value: 0.45637948293757175
|
|
|
|
key: test_accuracy
|
|
value: [0.84057971 0.89855072 0.89855072 0.89855072 0.84057971 0.85507246
|
|
0.94117647 0.88235294 0.91176471 0.86764706]
|
|
|
|
mean value: 0.8834825234441602
|
|
|
|
key: train_accuracy
|
|
value: [0.88654781 0.8897893 0.88492707 0.88654781 0.89303079 0.89303079
|
|
0.88834951 0.88834951 0.88996764 0.88511327]
|
|
|
|
mean value: 0.8885653517122731
|
|
|
|
key: test_fscore
|
|
value: [0.26666667 0.46153846 0.53333333 0.58823529 0.42105263 0.375
|
|
0.71428571 0.2 0.57142857 0.30769231]
|
|
|
|
mean value: 0.4439232980641649
|
|
|
|
key: train_fscore
|
|
value: [0.52054795 0.46875 0.46616541 0.46153846 0.51470588 0.51470588
|
|
0.48120301 0.48888889 0.47692308 0.5034965 ]
|
|
|
|
mean value: 0.4896925061810924
|
|
|
|
key: test_precision
|
|
value: [0.4 1. 0.8 0.71428571 0.44444444 0.5
|
|
1. 1. 0.8 0.5 ]
|
|
|
|
mean value: 0.7158730158730159
|
|
|
|
key: train_precision
|
|
value: [0.63333333 0.71428571 0.65957447 0.68181818 0.7 0.7
|
|
0.69565217 0.6875 0.72093023 0.64285714]
|
|
|
|
mean value: 0.6835951246850661
|
|
|
|
key: test_recall
|
|
value: [0.2 0.3 0.4 0.5 0.4 0.3
|
|
0.55555556 0.11111111 0.44444444 0.22222222]
|
|
|
|
mean value: 0.3433333333333333
|
|
|
|
key: train_recall
|
|
value: [0.44186047 0.34883721 0.36046512 0.34883721 0.40697674 0.40697674
|
|
0.36781609 0.37931034 0.35632184 0.4137931 ]
|
|
|
|
mean value: 0.3831194867682438
|
|
|
|
key: test_roc_auc
|
|
value: [0.57457627 0.65 0.69152542 0.73305085 0.65762712 0.62457627
|
|
0.77777778 0.55555556 0.71374765 0.59416196]
|
|
|
|
mean value: 0.6572598870056497
|
|
|
|
key: train_roc_auc
|
|
value: [0.7002146 0.66311917 0.66516664 0.66123593 0.68936408 0.68936408
|
|
0.67072537 0.67553088 0.66686148 0.68806416]
|
|
|
|
mean value: 0.6769646398059674
|
|
|
|
key: test_jcc
|
|
value: [0.15384615 0.3 0.36363636 0.41666667 0.26666667 0.23076923
|
|
0.55555556 0.11111111 0.4 0.18181818]
|
|
|
|
mean value: 0.298006993006993
|
|
|
|
key: train_jcc
|
|
value: [0.35185185 0.30612245 0.30392157 0.3 0.34653465 0.34653465
|
|
0.31683168 0.32352941 0.31313131 0.3364486 ]
|
|
|
|
mean value: 0.32449061825847647
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01529241 0.02115893 0.02641344 0.02176976 0.02065039 0.02290654
|
|
0.01941514 0.02138352 0.02597094 0.0217495 ]
|
|
|
|
mean value: 0.021671056747436523
|
|
|
|
key: score_time
|
|
value: [0.01033115 0.01156545 0.01200056 0.01208639 0.01204586 0.01208925
|
|
0.01208282 0.01201487 0.01209807 0.01203823]
|
|
|
|
mean value: 0.011835265159606933
|
|
|
|
key: test_mcc
|
|
value: [0.45738492 0.60255865 0.64915254 0. 0.2707383 0.20728266
|
|
0.67648143 0.31280044 0.44508294 0.72130589]
|
|
|
|
mean value: 0.43427877670594145
|
|
|
|
key: train_mcc
|
|
value: [0.71183823 0.57956419 0.7118833 0.14170184 0.56889551 0.72715685
|
|
0.54484148 0.36214427 0.68803552 0.72680559]
|
|
|
|
mean value: 0.5762866787597557
|
|
|
|
key: test_accuracy
|
|
value: [0.88405797 0.91304348 0.91304348 0.85507246 0.84057971 0.8115942
|
|
0.88235294 0.88235294 0.83823529 0.94117647]
|
|
|
|
mean value: 0.8761508951406649
|
|
|
|
key: train_accuracy
|
|
value: [0.93517018 0.91247974 0.92544571 0.86385737 0.910859 0.93354943
|
|
0.82038835 0.8802589 0.90614887 0.93851133]
|
|
|
|
mean value: 0.9026668869621773
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.57142857 0.7 0. 0.35294118 0.31578947
|
|
0.69230769 0.2 0.52173913 0.71428571]
|
|
|
|
mean value: 0.4568491758611559
|
|
|
|
key: train_fscore
|
|
value: [0.74358974 0.55 0.75268817 0.04545455 0.54545455 0.76571429
|
|
0.58736059 0.26 0.72641509 0.75324675]
|
|
|
|
mean value: 0.5729923734638046
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.7 0. 0.42857143 0.33333333
|
|
0.52941176 1. 0.42857143 1. ]
|
|
|
|
mean value: 0.608655462184874
|
|
|
|
key: train_precision
|
|
value: [0.82857143 0.97058824 0.7 1. 0.94285714 0.75280899
|
|
0.43406593 1. 0.616 0.86567164]
|
|
|
|
mean value: 0.8110563371343713
|
|
|
|
key: test_recall
|
|
value: [0.4 0.4 0.7 0. 0.3 0.3
|
|
1. 0.11111111 0.66666667 0.55555556]
|
|
|
|
mean value: 0.44333333333333336
|
|
|
|
key: train_recall
|
|
value: [0.6744186 0.38372093 0.81395349 0.02325581 0.38372093 0.77906977
|
|
0.90804598 0.14942529 0.88505747 0.66666667]
|
|
|
|
mean value: 0.5667334937182572
|
|
|
|
key: test_roc_auc
|
|
value: [0.68305085 0.7 0.82457627 0.5 0.61610169 0.59915254
|
|
0.93220339 0.55555556 0.76553672 0.77777778]
|
|
|
|
mean value: 0.6953954802259887
|
|
|
|
key: train_roc_auc
|
|
value: [0.82590987 0.69091885 0.87872816 0.51162791 0.68997723 0.86881925
|
|
0.85703617 0.57471264 0.897331 0.82485876]
|
|
|
|
mean value: 0.761991982264025
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.4 0.53846154 0. 0.21428571 0.1875
|
|
0.52941176 0.11111111 0.35294118 0.55555556]
|
|
|
|
mean value: 0.3222600193923723
|
|
|
|
key: train_jcc
|
|
value: [0.59183673 0.37931034 0.60344828 0.02325581 0.375 0.62037037
|
|
0.41578947 0.14942529 0.57037037 0.60416667]
|
|
|
|
mean value: 0.4332973337784961
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02343059 0.02685213 0.02398324 0.02492046 0.02352524 0.02817893
|
|
0.02620912 0.02336764 0.02135277 0.02217007]
|
|
|
|
mean value: 0.02439901828765869
|
|
|
|
key: score_time
|
|
value: [0.01477814 0.01542377 0.01348925 0.01207185 0.01205969 0.01209307
|
|
0.01214099 0.01210618 0.01211667 0.01212096]
|
|
|
|
mean value: 0.012840056419372558
|
|
|
|
key: test_mcc
|
|
value: [0.4517699 0.41966582 0.45581115 0.74918625 0.42506154 0.04372729
|
|
0.64349815 0.44570528 0.72453254 0.49036845]
|
|
|
|
mean value: 0.4849326376862214
|
|
|
|
key: train_mcc
|
|
value: [0.76959666 0.49789179 0.37708499 0.71805606 0.69153276 0.72889622
|
|
0.56607423 0.56607423 0.77249518 0.56628359]
|
|
|
|
mean value: 0.6253985719938526
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.88405797 0.69565217 0.92753623 0.76811594 0.8115942
|
|
0.92647059 0.89705882 0.94117647 0.79411765]
|
|
|
|
mean value: 0.8515345268542199
|
|
|
|
key: train_accuracy
|
|
value: [0.94327391 0.89951378 0.61588331 0.9286872 0.90275527 0.94003241
|
|
0.90938511 0.90938511 0.94660194 0.83495146]
|
|
|
|
mean value: 0.8830469491694335
|
|
|
|
key: test_fscore
|
|
value: [0.52631579 0.33333333 0.48780488 0.7826087 0.5 0.13333333
|
|
0.66666667 0.36363636 0.75 0.53333333]
|
|
|
|
mean value: 0.5077032393477668
|
|
|
|
key: train_fscore
|
|
value: [0.80225989 0.44642857 0.41769042 0.75824176 0.72477064 0.72992701
|
|
0.53333333 0.53333333 0.80239521 0.60769231]
|
|
|
|
mean value: 0.6356072467807314
|
|
|
|
key: test_precision
|
|
value: [0.55555556 1. 0.32258065 0.69230769 0.36363636 0.2
|
|
0.83333333 1. 0.85714286 0.38095238]
|
|
|
|
mean value: 0.6205508828089473
|
|
|
|
key: train_precision
|
|
value: [0.78021978 0.96153846 0.26479751 0.71875 0.59848485 0.98039216
|
|
0.96969697 0.96969697 0.8375 0.4566474 ]
|
|
|
|
mean value: 0.7537724093131868
|
|
|
|
key: test_recall
|
|
value: [0.5 0.2 1. 0.9 0.8 0.1
|
|
0.55555556 0.22222222 0.66666667 0.88888889]
|
|
|
|
mean value: 0.5833333333333334
|
|
|
|
key: train_recall
|
|
value: [0.8255814 0.29069767 0.98837209 0.80232558 0.91860465 0.58139535
|
|
0.36781609 0.36781609 0.77011494 0.90804598]
|
|
|
|
mean value: 0.6820769847634323
|
|
|
|
key: test_roc_auc
|
|
value: [0.71610169 0.6 0.8220339 0.91610169 0.78135593 0.51610169
|
|
0.7693032 0.61111111 0.82485876 0.83427495]
|
|
|
|
mean value: 0.7391242937853107
|
|
|
|
key: train_roc_auc
|
|
value: [0.89395831 0.64440722 0.77196382 0.87573906 0.90939649 0.78975605
|
|
0.68296643 0.68296643 0.87281642 0.86551075]
|
|
|
|
mean value: 0.7989480969014902
|
|
|
|
key: test_jcc
|
|
value: [0.35714286 0.2 0.32258065 0.64285714 0.33333333 0.07142857
|
|
0.5 0.22222222 0.6 0.36363636]
|
|
|
|
mean value: 0.3613201135781781
|
|
|
|
key: train_jcc
|
|
value: [0.66981132 0.28735632 0.26397516 0.61061947 0.56834532 0.57471264
|
|
0.36363636 0.36363636 0.67 0.43646409]
|
|
|
|
mean value: 0.48085570499895347
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22201872 0.20744562 0.20480037 0.20456576 0.20357203 0.20545721
|
|
0.20451236 0.2157166 0.20554924 0.21050262]
|
|
|
|
mean value: 0.20841405391693116
|
|
|
|
key: score_time
|
|
value: [0.01546049 0.015517 0.01565623 0.01554155 0.01551032 0.01557827
|
|
0.01543522 0.01714277 0.01562762 0.01682186]
|
|
|
|
mean value: 0.015829133987426757
|
|
|
|
key: test_mcc
|
|
value: [0.45738492 0.45738492 0.62245871 0.81616728 0.57141623 0.49549267
|
|
0.530844 0.6955463 0.61581921 0.72453254]
|
|
|
|
mean value: 0.5987046801214446
|
|
|
|
key: train_mcc
|
|
value: [0.92534618 0.93849646 0.88363859 0.93145528 0.95908232 0.95210096
|
|
0.91850378 0.89723008 0.92493049 0.91850378]
|
|
|
|
mean value: 0.9249287909835626
|
|
|
|
key: test_accuracy
|
|
value: [0.88405797 0.88405797 0.91304348 0.95652174 0.88405797 0.86956522
|
|
0.89705882 0.92647059 0.91176471 0.94117647]
|
|
|
|
mean value: 0.9067774936061381
|
|
|
|
key: train_accuracy
|
|
value: [0.9821718 0.98541329 0.97244733 0.98379254 0.99027553 0.98865478
|
|
0.98058252 0.97572816 0.98220065 0.98058252]
|
|
|
|
mean value: 0.9821849118555701
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.5 0.66666667 0.82352941 0.63636364 0.57142857
|
|
0.58823529 0.73684211 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6439732352271051
|
|
|
|
key: train_fscore
|
|
value: [0.93567251 0.94674556 0.89940828 0.94047619 0.96385542 0.95808383
|
|
0.92941176 0.91017964 0.93333333 0.92941176]
|
|
|
|
mean value: 0.9346578308735657
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.75 1. 0.58333333 0.54545455
|
|
0.625 0.7 0.66666667 0.85714286]
|
|
|
|
mean value: 0.7060930735930736
|
|
|
|
key: train_precision
|
|
value: [0.94117647 0.96385542 0.91566265 0.96341463 1. 0.98765432
|
|
0.95180723 0.95 0.98717949 0.95180723]
|
|
|
|
mean value: 0.96125574430222
|
|
|
|
key: test_recall
|
|
value: [0.4 0.4 0.6 0.7 0.7 0.6
|
|
0.55555556 0.77777778 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6066666666666667
|
|
|
|
key: train_recall
|
|
value: [0.93023256 0.93023256 0.88372093 0.91860465 0.93023256 0.93023256
|
|
0.90804598 0.87356322 0.88505747 0.90804598]
|
|
|
|
mean value: 0.9097968457631649
|
|
|
|
key: test_roc_auc
|
|
value: [0.68305085 0.68305085 0.78305085 0.85 0.80762712 0.75762712
|
|
0.75235405 0.86346516 0.8079096 0.82485876]
|
|
|
|
mean value: 0.7812994350282486
|
|
|
|
key: train_roc_auc
|
|
value: [0.96040818 0.96229142 0.93526913 0.95647747 0.96511628 0.96417466
|
|
0.95025651 0.93301513 0.94158712 0.95025651]
|
|
|
|
mean value: 0.9518852402073829
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.33333333 0.5 0.7 0.46666667 0.4
|
|
0.41666667 0.58333333 0.5 0.6 ]
|
|
|
|
mean value: 0.48333333333333334
|
|
|
|
key: train_jcc
|
|
value: [0.87912088 0.8988764 0.8172043 0.88764045 0.93023256 0.91954023
|
|
0.86813187 0.83516484 0.875 0.86813187]
|
|
|
|
mean value: 0.8779043393581896
|
|
|
|
MCC on Blind test: 0.77
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10963535 0.11217332 0.11714625 0.11565113 0.09540892 0.11659002
|
|
0.09870648 0.11387324 0.12206197 0.12197232]
|
|
|
|
mean value: 0.11232190132141114
|
|
|
|
key: score_time
|
|
value: [0.03330135 0.03456187 0.03123403 0.02251244 0.0219481 0.02423906
|
|
0.02876949 0.0243938 0.0360713 0.03810143]
|
|
|
|
mean value: 0.029513287544250488
|
|
|
|
key: test_mcc
|
|
value: [0.62245871 0.49386694 0.81850077 0.603494 0.72042027 0.40651745
|
|
0.64349815 0.55503877 0.530844 0.74387947]
|
|
|
|
mean value: 0.613851853347062
|
|
|
|
key: train_mcc
|
|
value: [0.95908232 0.97965663 0.97963987 0.97965663 0.95946656 0.94524022
|
|
0.96626736 0.97984335 0.96630041 0.95943803]
|
|
|
|
mean value: 0.9674591383436767
|
|
|
|
key: test_accuracy
|
|
value: [0.91304348 0.88405797 0.95652174 0.91304348 0.92753623 0.82608696
|
|
0.92647059 0.91176471 0.89705882 0.94117647]
|
|
|
|
mean value: 0.9096760443307758
|
|
|
|
key: train_accuracy
|
|
value: [0.99027553 0.99513776 0.99513776 0.99513776 0.99027553 0.98703404
|
|
0.99190939 0.99514563 0.99190939 0.99029126]
|
|
|
|
mean value: 0.9922254042684877
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.55555556 0.84210526 0.625 0.76190476 0.5
|
|
0.66666667 0.57142857 0.58823529 0.77777778]
|
|
|
|
mean value: 0.6555340557275542
|
|
|
|
key: train_fscore
|
|
value: [0.96385542 0.98245614 0.98224852 0.98245614 0.96511628 0.95238095
|
|
0.97076023 0.98245614 0.9704142 0.96470588]
|
|
|
|
mean value: 0.9716849912354659
|
|
|
|
key: test_precision
|
|
value: [0.75 0.625 0.88888889 0.83333333 0.72727273 0.42857143
|
|
0.83333333 0.8 0.625 0.77777778]
|
|
|
|
mean value: 0.728917748917749
|
|
|
|
key: train_precision
|
|
value: [1. 0.98823529 1. 0.98823529 0.96511628 0.97560976
|
|
0.98809524 1. 1. 0.98795181]
|
|
|
|
mean value: 0.9893243668726777
|
|
|
|
key: test_recall
|
|
value: [0.6 0.5 0.8 0.5 0.8 0.6
|
|
0.55555556 0.44444444 0.55555556 0.77777778]
|
|
|
|
mean value: 0.6133333333333333
|
|
|
|
key: train_recall
|
|
value: [0.93023256 0.97674419 0.96511628 0.97674419 0.96511628 0.93023256
|
|
0.95402299 0.96551724 0.94252874 0.94252874]
|
|
|
|
mean value: 0.9548783747661053
|
|
|
|
key: test_roc_auc
|
|
value: [0.78305085 0.72457627 0.89152542 0.74152542 0.87457627 0.73220339
|
|
0.7693032 0.71374765 0.75235405 0.87193974]
|
|
|
|
mean value: 0.7854802259887006
|
|
|
|
key: train_roc_auc
|
|
value: [0.96511628 0.98743047 0.98255814 0.98743047 0.97973328 0.96323304
|
|
0.97606987 0.98275862 0.97126437 0.97032275]
|
|
|
|
mean value: 0.976591729755934
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.38461538 0.72727273 0.45454545 0.61538462 0.33333333
|
|
0.5 0.4 0.41666667 0.63636364]
|
|
|
|
mean value: 0.4968181818181818
|
|
|
|
key: train_jcc
|
|
value: [0.93023256 0.96551724 0.96511628 0.96551724 0.93258427 0.90909091
|
|
0.94318182 0.96551724 0.94252874 0.93181818]
|
|
|
|
mean value: 0.9451104475733247
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.33492136 0.28897905 0.28859377 0.20644855 0.2192409 0.24099922
|
|
0.32627773 0.25938916 0.26763558 0.32626319]
|
|
|
|
mean value: 0.2758748531341553
|
|
|
|
key: score_time
|
|
value: [0.03320241 0.0170958 0.01673841 0.02886033 0.01673412 0.03081465
|
|
0.03136921 0.02821088 0.028126 0.02861452]
|
|
|
|
mean value: 0.02597663402557373
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.17426801 0.29455849 -0.04992517 0.25021503 -0.04992517
|
|
0. 0.31280044 0.31280044 0.31280044]
|
|
|
|
mean value: 0.15575925071443147
|
|
|
|
key: train_mcc
|
|
value: [0.80039825 0.78507404 0.79275949 0.78507404 0.81554149 0.80799178
|
|
0.79507412 0.79507412 0.7721733 0.79507412]
|
|
|
|
mean value: 0.7944234753033669
|
|
|
|
key: test_accuracy
|
|
value: [0.85507246 0.85507246 0.86956522 0.84057971 0.85507246 0.84057971
|
|
0.86764706 0.88235294 0.88235294 0.88235294]
|
|
|
|
mean value: 0.8630647911338448
|
|
|
|
key: train_accuracy
|
|
value: [0.95461912 0.95137763 0.95299838 0.95137763 0.95786062 0.95623987
|
|
0.95307443 0.95307443 0.94822006 0.95307443]
|
|
|
|
mean value: 0.9531916623394334
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0. 0.16666667 0.18181818 0. 0.28571429 0.
|
|
0. 0.2 0.2 0.2 ]
|
|
|
|
mean value: 0.12341991341991342
|
|
|
|
key: train_fscore
|
|
value: [0.80555556 0.78873239 0.7972028 0.78873239 0.82191781 0.8137931
|
|
0.8 0.8 0.77464789 0.8 ]
|
|
|
|
mean value: 0.7990581940482145
|
|
|
|
key: test_precision
|
|
value: [0. 0.5 1. 0. 0.5 0. 0. 1. 1. 1. ]
|
|
|
|
mean value: 0.5
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.1 0.1 0. 0.2 0.
|
|
0. 0.11111111 0.11111111 0.11111111]
|
|
|
|
mean value: 0.07333333333333333
|
|
|
|
key: train_recall
|
|
value: [0.6744186 0.65116279 0.6627907 0.65116279 0.69767442 0.68604651
|
|
0.66666667 0.66666667 0.63218391 0.66666667]
|
|
|
|
mean value: 0.6655439721999465
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.54152542 0.55 0.49152542 0.58305085 0.49152542
|
|
0.5 0.55555556 0.55555556 0.55555556]
|
|
|
|
mean value: 0.5324293785310734
|
|
|
|
key: train_roc_auc
|
|
value: [0.8372093 0.8255814 0.83139535 0.8255814 0.84883721 0.84302326
|
|
0.83333333 0.83333333 0.81609195 0.83333333]
|
|
|
|
mean value: 0.8327719860999733
|
|
|
|
key: test_jcc
|
|
value: [0. 0.09090909 0.1 0. 0.16666667 0.
|
|
0. 0.11111111 0.11111111 0.11111111]
|
|
|
|
mean value: 0.06909090909090909
|
|
|
|
key: train_jcc
|
|
value: [0.6744186 0.65116279 0.6627907 0.65116279 0.69767442 0.68604651
|
|
0.66666667 0.66666667 0.63218391 0.66666667]
|
|
|
|
mean value: 0.6655439721999465
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.86672139 0.84664679 0.86712384 0.87101698 0.84984779 0.85930324
|
|
0.85190558 0.85681391 0.8511548 0.85800982]
|
|
|
|
mean value: 0.8578544139862061
|
|
|
|
key: score_time
|
|
value: [0.00956321 0.00940728 0.00999069 0.00948048 0.00941825 0.00948691
|
|
0.00951052 0.00937724 0.00970578 0.0100801 ]
|
|
|
|
mean value: 0.009602046012878418
|
|
|
|
key: test_mcc
|
|
value: [0.69625714 0.62245871 0.88305085 0.67981004 0.64390346 0.46279992
|
|
0.64349815 0.86733896 0.80022753 0.66553576]
|
|
|
|
mean value: 0.6964880511261671
|
|
|
|
key: train_mcc
|
|
value: [0.99323424 1. 0.99323424 0.99323424 0.99323424 0.99323424
|
|
0.99330139 0.98658269 0.99330139 0.98658269]
|
|
|
|
mean value: 0.9925939355977453
|
|
|
|
key: test_accuracy
|
|
value: [0.92753623 0.91304348 0.97101449 0.92753623 0.89855072 0.85507246
|
|
0.92647059 0.97058824 0.95588235 0.92647059]
|
|
|
|
mean value: 0.9272165387894288
|
|
|
|
key: train_accuracy
|
|
value: [0.99837925 1. 0.99837925 0.99837925 0.99837925 0.99837925
|
|
0.99838188 0.99676375 0.99838188 0.99676375]
|
|
|
|
mean value: 0.9982187534421173
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.66666667 0.9 0.70588235 0.69565217 0.54545455
|
|
0.66666667 0.875 0.82352941 0.70588235]
|
|
|
|
mean value: 0.7321576275611139
|
|
|
|
key: train_fscore
|
|
value: [0.99415205 1. 0.99415205 0.99415205 0.99415205 0.99415205
|
|
0.99421965 0.98837209 0.99421965 0.98837209]
|
|
|
|
mean value: 0.9935943726323022
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.75 0.9 0.85714286 0.61538462 0.5
|
|
0.83333333 1. 0.875 0.75 ]
|
|
|
|
mean value: 0.7858638583638584
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.7 0.6 0.9 0.6 0.8 0.6
|
|
0.55555556 0.77777778 0.77777778 0.66666667]
|
|
|
|
mean value: 0.6977777777777778
|
|
|
|
key: train_recall
|
|
value: [0.98837209 1. 0.98837209 0.98837209 0.98837209 0.98837209
|
|
0.98850575 0.97701149 0.98850575 0.97701149]
|
|
|
|
mean value: 0.98728949478749
|
|
|
|
key: test_roc_auc
|
|
value: [0.83305085 0.78305085 0.94152542 0.79152542 0.85762712 0.74915254
|
|
0.7693032 0.88888889 0.88041431 0.81638418]
|
|
|
|
mean value: 0.8310922787193974
|
|
|
|
key: train_roc_auc
|
|
value: [0.99418605 1. 0.99418605 0.99418605 0.99418605 0.99418605
|
|
0.99425287 0.98850575 0.99425287 0.98850575]
|
|
|
|
mean value: 0.993644747393745
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.5 0.81818182 0.54545455 0.53333333 0.375
|
|
0.5 0.77777778 0.7 0.54545455]
|
|
|
|
mean value: 0.5878535353535354
|
|
|
|
key: train_jcc
|
|
value: [0.98837209 1. 0.98837209 0.98837209 0.98837209 0.98837209
|
|
0.98850575 0.97701149 0.98850575 0.97701149]
|
|
|
|
mean value: 0.98728949478749
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03572869 0.04819536 0.03441334 0.03409314 0.04804707 0.06565928
|
|
0.03767586 0.03153682 0.03145766 0.03155422]
|
|
|
|
mean value: 0.03983614444732666
|
|
|
|
key: score_time
|
|
value: [0.01304913 0.01376319 0.01407003 0.0140295 0.02094889 0.01705551
|
|
0.02330947 0.0159955 0.01563549 0.02442741]
|
|
|
|
mean value: 0.017228412628173827
|
|
|
|
key: test_mcc
|
|
value: [ 0. -0.08777335 -0.08777335 -0.12705137 0.04372729 0.
|
|
-0.09764168 0.05623741 -0.08390719 -0.04771532]
|
|
|
|
mean value: -0.04318975547680215
|
|
|
|
key: train_mcc
|
|
value: [0.10011697 0.10011697 0.14170184 0.2007233 0. 0.10011697
|
|
0.09945922 0.17254831 0.17254831 0.09945922]
|
|
|
|
mean value: 0.1186791091098631
|
|
|
|
key: test_accuracy
|
|
value: [0.85507246 0.8115942 0.8115942 0.76811594 0.8115942 0.85507246
|
|
0.80882353 0.82352941 0.82352941 0.85294118]
|
|
|
|
mean value: 0.8221867007672634
|
|
|
|
key: train_accuracy
|
|
value: [0.86223663 0.86223663 0.86385737 0.86709887 0.86061588 0.86223663
|
|
0.86084142 0.86407767 0.86407767 0.86084142]
|
|
|
|
mean value: 0.8628120197426739
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0.13333333 0.
|
|
0. 0.14285714 0. 0. ]
|
|
|
|
mean value: 0.02761904761904762
|
|
|
|
key: train_fscore
|
|
value: [0.02298851 0.02298851 0.04545455 0.08888889 0. 0.02298851
|
|
0.02272727 0.06666667 0.06666667 0.02272727]
|
|
|
|
mean value: 0.038209683037269244
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0.2 0. 0. 0.2 0. 0. ]
|
|
|
|
mean value: 0.04
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0.1 0.
|
|
0. 0.11111111 0. 0. ]
|
|
|
|
mean value: 0.021111111111111112
|
|
|
|
key: train_recall
|
|
value: [0.01162791 0.01162791 0.02325581 0.04651163 0. 0.01162791
|
|
0.01149425 0.03448276 0.03448276 0.01149425]
|
|
|
|
mean value: 0.019660518577920342
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.47457627 0.47457627 0.44915254 0.51610169 0.5
|
|
0.46610169 0.52165725 0.47457627 0.49152542]
|
|
|
|
mean value: 0.48682674199623355
|
|
|
|
key: train_roc_auc
|
|
value: [0.50581395 0.50581395 0.51162791 0.52325581 0.5 0.50581395
|
|
0.50574713 0.51724138 0.51724138 0.50574713]
|
|
|
|
mean value: 0.5098302592889602
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0.07142857 0.
|
|
0. 0.07692308 0. 0. ]
|
|
|
|
mean value: 0.014835164835164835
|
|
|
|
key: train_jcc
|
|
value: [0.01162791 0.01162791 0.02325581 0.04651163 0. 0.01162791
|
|
0.01149425 0.03448276 0.03448276 0.01149425]
|
|
|
|
mean value: 0.019660518577920342
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.039325 0.04072046 0.03789592 0.04087138 0.04203081 0.04368925
|
|
0.05152917 0.02732038 0.01859951 0.01669264]
|
|
|
|
mean value: 0.03586745262145996
|
|
|
|
key: score_time
|
|
value: [0.02119255 0.01899457 0.01899362 0.01898074 0.02511334 0.01914859
|
|
0.02012944 0.02779293 0.01213288 0.01207614]
|
|
|
|
mean value: 0.019455480575561523
|
|
|
|
key: test_mcc
|
|
value: [0.54345279 0.45738492 0.42638684 0.67892378 0.53220339 0.16516678
|
|
0.64349815 0.55503877 0.64009548 0.64009548]
|
|
|
|
mean value: 0.5282246379782266
|
|
|
|
key: train_mcc
|
|
value: [0.72975734 0.74465952 0.69579594 0.71195653 0.72156052 0.72811713
|
|
0.7247231 0.69934924 0.68339509 0.69934924]
|
|
|
|
mean value: 0.7138663660336864
|
|
|
|
key: test_accuracy
|
|
value: [0.89855072 0.88405797 0.88405797 0.92753623 0.88405797 0.82608696
|
|
0.92647059 0.91176471 0.92647059 0.92647059]
|
|
|
|
mean value: 0.8995524296675192
|
|
|
|
key: train_accuracy
|
|
value: [0.94003241 0.94327391 0.93354943 0.93679092 0.93841167 0.94003241
|
|
0.93851133 0.93365696 0.93042071 0.93365696]
|
|
|
|
mean value: 0.9368336716442962
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.5 0.42857143 0.66666667 0.6 0.25
|
|
0.66666667 0.57142857 0.61538462 0.61538462]
|
|
|
|
mean value: 0.5502337858220211
|
|
|
|
key: train_fscore
|
|
value: [0.75167785 0.76190476 0.71328671 0.72727273 0.74324324 0.74125874
|
|
0.74666667 0.71724138 0.70344828 0.71724138]
|
|
|
|
mean value: 0.7323241740464606
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.66666667 0.75 1. 0.6 0.33333333
|
|
0.83333333 0.8 1. 1. ]
|
|
|
|
mean value: 0.7697619047619048
|
|
|
|
key: train_precision
|
|
value: [0.88888889 0.91803279 0.89473684 0.9122807 0.88709677 0.92982456
|
|
0.88888889 0.89655172 0.87931034 0.89655172]
|
|
|
|
mean value: 0.8992163237223179
|
|
|
|
key: test_recall
|
|
value: [0.5 0.4 0.3 0.5 0.6 0.2
|
|
0.55555556 0.44444444 0.44444444 0.44444444]
|
|
|
|
mean value: 0.4388888888888889
|
|
|
|
key: train_recall
|
|
value: [0.65116279 0.65116279 0.59302326 0.60465116 0.63953488 0.61627907
|
|
0.64367816 0.59770115 0.5862069 0.59770115]
|
|
|
|
mean value: 0.6181101309810211
|
|
|
|
key: test_roc_auc
|
|
value: [0.73305085 0.68305085 0.64152542 0.75 0.76610169 0.56610169
|
|
0.7693032 0.71374765 0.72222222 0.72222222]
|
|
|
|
mean value: 0.7067325800376648
|
|
|
|
key: train_roc_auc
|
|
value: [0.81899006 0.8208733 0.79086191 0.79761748 0.8131761 0.80437306
|
|
0.81524774 0.79320086 0.78651211 0.79320086]
|
|
|
|
mean value: 0.8034053479763863
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.33333333 0.27272727 0.5 0.42857143 0.14285714
|
|
0.5 0.4 0.44444444 0.44444444]
|
|
|
|
mean value: 0.3883044733044733
|
|
|
|
key: train_jcc
|
|
value: [0.60215054 0.61538462 0.55434783 0.57142857 0.59139785 0.58888889
|
|
0.59574468 0.55913978 0.54255319 0.55913978]
|
|
|
|
mean value: 0.5780175731118705
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.31517696 0.39699268 0.40492535 0.45975351 0.52393436 0.49117422
|
|
0.40167809 0.46376204 0.4159534 0.51096559]
|
|
|
|
mean value: 0.43843162059783936
|
|
|
|
key: score_time
|
|
value: [0.01886177 0.01974273 0.0200808 0.01899362 0.03186607 0.01899004
|
|
0.01902366 0.01937413 0.0190258 0.02331686]
|
|
|
|
mean value: 0.0209275484085083
|
|
|
|
key: test_mcc
|
|
value: [0.54345279 0.45738492 0.42638684 0.67892378 0.53220339 0.16516678
|
|
0.64349815 0.33873645 0.64009548 0.64009548]
|
|
|
|
mean value: 0.5065944060160014
|
|
|
|
key: train_mcc
|
|
value: [0.72975734 0.74465952 0.69579594 0.71195653 0.72156052 0.72811713
|
|
0.7247231 0.54908765 0.68339509 0.69934924]
|
|
|
|
mean value: 0.6988402070775329
|
|
|
|
key: test_accuracy
|
|
value: [0.89855072 0.88405797 0.88405797 0.92753623 0.88405797 0.82608696
|
|
0.92647059 0.88235294 0.92647059 0.92647059]
|
|
|
|
mean value: 0.896611253196931
|
|
|
|
key: train_accuracy
|
|
value: [0.94003241 0.94327391 0.93354943 0.93679092 0.93841167 0.94003241
|
|
0.93851133 0.90614887 0.93042071 0.93365696]
|
|
|
|
mean value: 0.9340828625828075
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.5 0.42857143 0.66666667 0.6 0.25
|
|
0.66666667 0.33333333 0.61538462 0.61538462]
|
|
|
|
mean value: 0.5264242620124973
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.75167785 0.76190476 0.71328671 0.72727273 0.74324324 0.74125874
|
|
0.74666667 0.56060606 0.70344828 0.71724138]
|
|
|
|
mean value: 0.7166606421760321
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.66666667 0.75 1. 0.6 0.33333333
|
|
0.83333333 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.7564285714285715
|
|
|
|
key: train_precision
|
|
value: [0.88888889 0.91803279 0.89473684 0.9122807 0.88709677 0.92982456
|
|
0.88888889 0.82222222 0.87931034 0.89655172]
|
|
|
|
mean value: 0.8917833735307469
|
|
|
|
key: test_recall
|
|
value: [0.5 0.4 0.3 0.5 0.6 0.2
|
|
0.55555556 0.22222222 0.44444444 0.44444444]
|
|
|
|
mean value: 0.4166666666666667
|
|
|
|
key: train_recall
|
|
value: [0.65116279 0.65116279 0.59302326 0.60465116 0.63953488 0.61627907
|
|
0.64367816 0.42528736 0.5862069 0.59770115]
|
|
|
|
mean value: 0.6008687516706763
|
|
|
|
key: test_roc_auc
|
|
value: [0.73305085 0.68305085 0.64152542 0.75 0.76610169 0.56610169
|
|
0.7693032 0.60263653 0.72222222 0.72222222]
|
|
|
|
mean value: 0.6956214689265536
|
|
|
|
key: train_roc_auc
|
|
value: [0.81899006 0.8208733 0.79086191 0.79761748 0.8131761 0.80437306
|
|
0.81524774 0.70511072 0.78651211 0.79320086]
|
|
|
|
mean value: 0.7945963344040764
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.33333333 0.27272727 0.5 0.42857143 0.14285714
|
|
0.5 0.2 0.44444444 0.44444444]
|
|
|
|
mean value: 0.3683044733044733
|
|
|
|
key: train_jcc
|
|
value: [0.60215054 0.61538462 0.55434783 0.57142857 0.59139785 0.58888889
|
|
0.59574468 0.38947368 0.54255319 0.55913978]
|
|
|
|
mean value: 0.5610509630382995
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04470754 0.04497457 0.04462433 0.04594183 0.04550147 0.09245443
|
|
0.11428571 0.08033395 0.0793097 0.04500794]
|
|
|
|
mean value: 0.06371414661407471
|
|
|
|
key: score_time
|
|
value: [0.01247597 0.01236963 0.01238894 0.01236629 0.01535106 0.01283407
|
|
0.01882362 0.02166867 0.01562357 0.01540947]
|
|
|
|
mean value: 0.014931130409240722
|
|
|
|
key: test_mcc
|
|
value: [0.78520705 0.80076161 0.84757938 0.93220339 0.71692818 0.78247589
|
|
0.84757938 0.78520705 0.83483657 0.84757938]
|
|
|
|
mean value: 0.8180357889031028
|
|
|
|
key: train_mcc
|
|
value: [0.86319885 0.87890117 0.86381274 0.86364072 0.87997843 0.87450713
|
|
0.86333379 0.87479305 0.87102239 0.85806759]
|
|
|
|
mean value: 0.8691255870298835
|
|
|
|
key: test_accuracy
|
|
value: [0.88983051 0.89830508 0.92372881 0.96610169 0.8559322 0.88983051
|
|
0.92372881 0.88983051 0.91525424 0.92372881]
|
|
|
|
mean value: 0.9076271186440678
|
|
|
|
key: train_accuracy
|
|
value: [0.93126177 0.93879473 0.93126177 0.93126177 0.93973635 0.93691149
|
|
0.93126177 0.93691149 0.93502825 0.92843691]
|
|
|
|
mean value: 0.9340866290018832
|
|
|
|
key: test_fscore
|
|
value: [0.896 0.90322581 0.92436975 0.96610169 0.864 0.89430894
|
|
0.92436975 0.896 0.91935484 0.92436975]
|
|
|
|
mean value: 0.9112100526863455
|
|
|
|
key: train_fscore
|
|
value: [0.93259464 0.94042163 0.93308891 0.93296602 0.94074074 0.93813481
|
|
0.93271889 0.93836247 0.93652254 0.93027523]
|
|
|
|
mean value: 0.9355825888587944
|
|
|
|
key: test_precision
|
|
value: [0.84848485 0.86153846 0.91666667 0.96610169 0.81818182 0.859375
|
|
0.91666667 0.84848485 0.87692308 0.91666667]
|
|
|
|
mean value: 0.8829089748528308
|
|
|
|
key: train_precision
|
|
value: [0.91485507 0.91607143 0.90892857 0.91039427 0.92531876 0.92028986
|
|
0.9133574 0.91726619 0.91546763 0.90697674]
|
|
|
|
mean value: 0.914892591201125
|
|
|
|
key: test_recall
|
|
value: [0.94915254 0.94915254 0.93220339 0.96610169 0.91525424 0.93220339
|
|
0.93220339 0.94915254 0.96610169 0.93220339]
|
|
|
|
mean value: 0.9423728813559322
|
|
|
|
key: train_recall
|
|
value: [0.95103578 0.96610169 0.95856874 0.9566855 0.9566855 0.9566855
|
|
0.95291902 0.96045198 0.95856874 0.95480226]
|
|
|
|
mean value: 0.9572504708097929
|
|
|
|
key: test_roc_auc
|
|
value: [0.88983051 0.89830508 0.92372881 0.96610169 0.8559322 0.88983051
|
|
0.92372881 0.88983051 0.91525424 0.92372881]
|
|
|
|
mean value: 0.9076271186440678
|
|
|
|
key: train_roc_auc
|
|
value: [0.93126177 0.93879473 0.93126177 0.93126177 0.93973635 0.93691149
|
|
0.93126177 0.93691149 0.93502825 0.92843691]
|
|
|
|
mean value: 0.9340866290018832
|
|
|
|
key: test_jcc
|
|
value: [0.8115942 0.82352941 0.859375 0.93442623 0.76056338 0.80882353
|
|
0.859375 0.8115942 0.85074627 0.859375 ]
|
|
|
|
mean value: 0.8379402225420175
|
|
|
|
key: train_jcc
|
|
value: [0.87370242 0.88754325 0.87457045 0.87435456 0.88811189 0.88347826
|
|
0.87392055 0.88388215 0.88062284 0.86963979]
|
|
|
|
mean value: 0.8789826164821043
|
|
|
|
MCC on Blind test: 0.71
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.18941402 1.21630669 1.22050476 1.57758641 1.0369873 1.26718163
|
|
1.07845354 1.3389585 1.27597809 1.53487372]
|
|
|
|
mean value: 1.2736244678497315
|
|
|
|
key: score_time
|
|
value: [0.01518226 0.01569629 0.01648211 0.01600289 0.01586604 0.01583838
|
|
0.01574588 0.0157814 0.01580811 0.01682377]
|
|
|
|
mean value: 0.01592271327972412
|
|
|
|
key: test_mcc
|
|
value: [0.86640023 0.83098605 0.86440678 0.91855865 0.78247589 0.78247589
|
|
0.86490385 0.86640023 0.84757938 0.91538573]
|
|
|
|
mean value: 0.8539572677305944
|
|
|
|
key: train_mcc
|
|
value: [0.91910058 0.9285423 0.91915275 0.9303614 0.96610855 0.93236874
|
|
0.92483445 0.91906146 0.92472947 0.93222984]
|
|
|
|
mean value: 0.9296489531763065
|
|
|
|
key: test_accuracy
|
|
value: [0.93220339 0.91525424 0.93220339 0.95762712 0.88983051 0.88983051
|
|
0.93220339 0.93220339 0.92372881 0.95762712]
|
|
|
|
mean value: 0.926271186440678
|
|
|
|
key: train_accuracy
|
|
value: [0.95951036 0.96421846 0.95951036 0.96516008 0.98305085 0.96610169
|
|
0.96233522 0.95951036 0.96233522 0.96610169]
|
|
|
|
mean value: 0.9647834274952919
|
|
|
|
key: test_fscore
|
|
value: [0.93442623 0.91666667 0.93220339 0.95934959 0.89430894 0.89430894
|
|
0.93103448 0.93442623 0.92436975 0.95798319]
|
|
|
|
mean value: 0.9279077419123456
|
|
|
|
key: train_fscore
|
|
value: [0.95977549 0.96448598 0.95985061 0.96532334 0.98301887 0.96641791
|
|
0.96268657 0.95970009 0.96254682 0.96622889]
|
|
|
|
mean value: 0.9650034564583131
|
|
|
|
key: test_precision
|
|
value: [0.9047619 0.90163934 0.93220339 0.921875 0.859375 0.859375
|
|
0.94736842 0.9047619 0.91666667 0.95 ]
|
|
|
|
mean value: 0.9098026631335911
|
|
|
|
key: train_precision
|
|
value: [0.9535316 0.95732839 0.95185185 0.9608209 0.98487713 0.95748614
|
|
0.95378928 0.95522388 0.95716946 0.96261682]
|
|
|
|
mean value: 0.9594695437327295
|
|
|
|
key: test_recall
|
|
value: [0.96610169 0.93220339 0.93220339 1. 0.93220339 0.93220339
|
|
0.91525424 0.96610169 0.93220339 0.96610169]
|
|
|
|
mean value: 0.947457627118644
|
|
|
|
key: train_recall
|
|
value: [0.96610169 0.97175141 0.96798493 0.96986817 0.98116761 0.97551789
|
|
0.97175141 0.96421846 0.96798493 0.96986817]
|
|
|
|
mean value: 0.9706214689265537
|
|
|
|
key: test_roc_auc
|
|
value: [0.93220339 0.91525424 0.93220339 0.95762712 0.88983051 0.88983051
|
|
0.93220339 0.93220339 0.92372881 0.95762712]
|
|
|
|
mean value: 0.926271186440678
|
|
|
|
key: train_roc_auc
|
|
value: [0.95951036 0.96421846 0.95951036 0.96516008 0.98305085 0.96610169
|
|
0.96233522 0.95951036 0.96233522 0.96610169]
|
|
|
|
mean value: 0.9647834274952919
|
|
|
|
key: test_jcc
|
|
value: [0.87692308 0.84615385 0.87301587 0.921875 0.80882353 0.80882353
|
|
0.87096774 0.87692308 0.859375 0.91935484]
|
|
|
|
mean value: 0.8662235512484564
|
|
|
|
key: train_jcc
|
|
value: [0.92266187 0.93140794 0.92280072 0.93297101 0.96660482 0.93501805
|
|
0.92805755 0.92252252 0.92779783 0.93466425]
|
|
|
|
mean value: 0.9324506576895001
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01914287 0.01303315 0.012537 0.02088332 0.01327682 0.01260662
|
|
0.01408529 0.01505947 0.01520324 0.01581883]
|
|
|
|
mean value: 0.015164661407470702
|
|
|
|
key: score_time
|
|
value: [0.01280713 0.00985122 0.01225281 0.01444554 0.01036263 0.00944304
|
|
0.01089573 0.0101583 0.00990224 0.00958657]
|
|
|
|
mean value: 0.010970520973205566
|
|
|
|
key: test_mcc
|
|
value: [0.59875004 0.73049431 0.66254135 0.76668665 0.5770176 0.47628967
|
|
0.72881356 0.66139706 0.73261169 0.54623099]
|
|
|
|
mean value: 0.6480832917932536
|
|
|
|
key: train_mcc
|
|
value: [0.67422951 0.68226634 0.67057705 0.68033208 0.68980519 0.68066578
|
|
0.672703 0.69952078 0.6874018 0.69700741]
|
|
|
|
mean value: 0.68345089379373
|
|
|
|
key: test_accuracy
|
|
value: [0.79661017 0.86440678 0.83050847 0.88135593 0.78813559 0.73728814
|
|
0.86440678 0.83050847 0.86440678 0.77118644]
|
|
|
|
mean value: 0.8228813559322035
|
|
|
|
key: train_accuracy
|
|
value: [0.83709981 0.84086629 0.83521657 0.83992467 0.84463277 0.83992467
|
|
0.83615819 0.84934087 0.84369115 0.84839925]
|
|
|
|
mean value: 0.8415254237288136
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.86885246 0.83606557 0.88709677 0.7826087 0.72566372
|
|
0.86440678 0.82758621 0.85714286 0.784 ]
|
|
|
|
mean value: 0.8242946872671002
|
|
|
|
key: train_fscore
|
|
value: [0.83786317 0.84395199 0.83690587 0.84288355 0.84764543 0.84375
|
|
0.83888889 0.85294118 0.84427767 0.85023256]
|
|
|
|
mean value: 0.8439340299766152
|
|
|
|
key: test_precision
|
|
value: [0.76119403 0.84126984 0.80952381 0.84615385 0.80357143 0.75925926
|
|
0.86440678 0.84210526 0.90566038 0.74242424]
|
|
|
|
mean value: 0.8175568877230576
|
|
|
|
key: train_precision
|
|
value: [0.83395522 0.82789855 0.82841328 0.82758621 0.83152174 0.82405745
|
|
0.82513661 0.83303411 0.8411215 0.84007353]
|
|
|
|
mean value: 0.8312798203464746
|
|
|
|
key: test_recall
|
|
value: [0.86440678 0.89830508 0.86440678 0.93220339 0.76271186 0.69491525
|
|
0.86440678 0.81355932 0.81355932 0.83050847]
|
|
|
|
mean value: 0.8338983050847458
|
|
|
|
key: train_recall
|
|
value: [0.84180791 0.8606403 0.84557439 0.85875706 0.86440678 0.86440678
|
|
0.85310734 0.87382298 0.84745763 0.8606403 ]
|
|
|
|
mean value: 0.8570621468926554
|
|
|
|
key: test_roc_auc
|
|
value: [0.79661017 0.86440678 0.83050847 0.88135593 0.78813559 0.73728814
|
|
0.86440678 0.83050847 0.86440678 0.77118644]
|
|
|
|
mean value: 0.8228813559322034
|
|
|
|
key: train_roc_auc
|
|
value: [0.83709981 0.84086629 0.83521657 0.83992467 0.84463277 0.83992467
|
|
0.83615819 0.84934087 0.84369115 0.84839925]
|
|
|
|
mean value: 0.8415254237288136
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.76811594 0.71830986 0.79710145 0.64285714 0.56944444
|
|
0.76119403 0.70588235 0.75 0.64473684]
|
|
|
|
mean value: 0.703764206265805
|
|
|
|
key: train_jcc
|
|
value: [0.72096774 0.73003195 0.71955128 0.7284345 0.73557692 0.72972973
|
|
0.72248804 0.74358974 0.73051948 0.7394822 ]
|
|
|
|
mean value: 0.7300371593501526
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01547909 0.02258873 0.01792765 0.0175662 0.01782608 0.01765227
|
|
0.01754951 0.02099109 0.0175519 0.01776004]
|
|
|
|
mean value: 0.01828925609588623
|
|
|
|
key: score_time
|
|
value: [0.01229954 0.01270795 0.01278043 0.01270127 0.01276445 0.01268935
|
|
0.01278639 0.01266026 0.01265526 0.0126605 ]
|
|
|
|
mean value: 0.012670540809631347
|
|
|
|
key: test_mcc
|
|
value: [0.56223688 0.71196668 0.66446176 0.69006556 0.54623099 0.63632522
|
|
0.59356147 0.7013929 0.74672866 0.6486493 ]
|
|
|
|
mean value: 0.6501619424887987
|
|
|
|
key: train_mcc
|
|
value: [0.67685622 0.66195814 0.67264225 0.66042483 0.68502151 0.67085699
|
|
0.66907359 0.65462711 0.66446176 0.67442937]
|
|
|
|
mean value: 0.6690351773463729
|
|
|
|
key: test_accuracy
|
|
value: [0.77966102 0.8559322 0.83050847 0.83898305 0.77118644 0.81355932
|
|
0.79661017 0.84745763 0.87288136 0.8220339 ]
|
|
|
|
mean value: 0.8228813559322035
|
|
|
|
key: train_accuracy
|
|
value: [0.83709981 0.82956685 0.83521657 0.82862524 0.84180791 0.83427495
|
|
0.83333333 0.82580038 0.83050847 0.83615819]
|
|
|
|
mean value: 0.8332391713747646
|
|
|
|
key: test_fscore
|
|
value: [0.79032258 0.85470085 0.83870968 0.85271318 0.784 0.828125
|
|
0.8 0.85714286 0.86956522 0.832 ]
|
|
|
|
mean value: 0.8307279365594106
|
|
|
|
key: train_fscore
|
|
value: [0.84400361 0.83708371 0.84162896 0.83662478 0.84671533 0.84086799
|
|
0.8401084 0.83378257 0.83870968 0.8423913 ]
|
|
|
|
mean value: 0.8401916323800057
|
|
|
|
key: test_precision
|
|
value: [0.75384615 0.86206897 0.8 0.78571429 0.74242424 0.76811594
|
|
0.78688525 0.80597015 0.89285714 0.78787879]
|
|
|
|
mean value: 0.798576091542221
|
|
|
|
key: train_precision
|
|
value: [0.80968858 0.80172414 0.81010453 0.79931389 0.82123894 0.80869565
|
|
0.80729167 0.79725086 0.8 0.81151832]
|
|
|
|
mean value: 0.8066826583123691
|
|
|
|
key: test_recall
|
|
value: [0.83050847 0.84745763 0.88135593 0.93220339 0.83050847 0.89830508
|
|
0.81355932 0.91525424 0.84745763 0.88135593]
|
|
|
|
mean value: 0.8677966101694915
|
|
|
|
key: train_recall
|
|
value: [0.88135593 0.87570621 0.87570621 0.87758945 0.87382298 0.87570621
|
|
0.87570621 0.87382298 0.88135593 0.87570621]
|
|
|
|
mean value: 0.876647834274953
|
|
|
|
key: test_roc_auc
|
|
value: [0.77966102 0.8559322 0.83050847 0.83898305 0.77118644 0.81355932
|
|
0.79661017 0.84745763 0.87288136 0.8220339 ]
|
|
|
|
mean value: 0.8228813559322035
|
|
|
|
key: train_roc_auc
|
|
value: [0.83709981 0.82956685 0.83521657 0.82862524 0.84180791 0.83427495
|
|
0.83333333 0.82580038 0.83050847 0.83615819]
|
|
|
|
mean value: 0.8332391713747647
|
|
|
|
key: test_jcc
|
|
value: [0.65333333 0.74626866 0.72222222 0.74324324 0.64473684 0.70666667
|
|
0.66666667 0.75 0.76923077 0.71232877]
|
|
|
|
mean value: 0.711469716730787
|
|
|
|
key: train_jcc
|
|
value: [0.7301092 0.71981424 0.7265625 0.7191358 0.73417722 0.72542902
|
|
0.72429907 0.71494607 0.72222222 0.72769953]
|
|
|
|
mean value: 0.7244394869711428
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01644087 0.0190351 0.01284313 0.01196957 0.01290107 0.01443887
|
|
0.01914406 0.01253033 0.01699519 0.01464415]
|
|
|
|
mean value: 0.015094232559204102
|
|
|
|
key: score_time
|
|
value: [0.03935957 0.02268672 0.01549888 0.02082419 0.02131462 0.02241468
|
|
0.02255774 0.02348351 0.02258015 0.02211094]
|
|
|
|
mean value: 0.023283100128173827
|
|
|
|
key: test_mcc
|
|
value: [0.79930525 0.80076161 0.78247589 0.81649658 0.66254135 0.7484552
|
|
0.76668665 0.8136762 0.74672866 0.68041382]
|
|
|
|
mean value: 0.7617541211839434
|
|
|
|
key: train_mcc
|
|
value: [0.83904784 0.83441985 0.83287493 0.83086927 0.83400383 0.84042935
|
|
0.82886715 0.83311864 0.84323429 0.84243966]
|
|
|
|
mean value: 0.8359304807927322
|
|
|
|
key: test_accuracy
|
|
value: [0.88983051 0.89830508 0.88983051 0.90677966 0.83050847 0.87288136
|
|
0.88135593 0.90677966 0.87288136 0.83898305]
|
|
|
|
mean value: 0.8788135593220339
|
|
|
|
key: train_accuracy
|
|
value: [0.91902072 0.91619586 0.91525424 0.91431262 0.91619586 0.91902072
|
|
0.913371 0.91525424 0.91996234 0.91996234]
|
|
|
|
mean value: 0.9168549905838042
|
|
|
|
key: test_fscore
|
|
value: [0.90076336 0.90322581 0.89430894 0.91056911 0.83606557 0.87804878
|
|
0.88709677 0.90756303 0.87603306 0.84552846]
|
|
|
|
mean value: 0.8839202880808448
|
|
|
|
key: train_fscore
|
|
value: [0.92095588 0.91901729 0.91833031 0.91734787 0.91872146 0.92196007
|
|
0.91636364 0.91847826 0.92335437 0.92293744]
|
|
|
|
mean value: 0.9197466592565016
|
|
|
|
key: test_precision
|
|
value: [0.81944444 0.86153846 0.859375 0.875 0.80952381 0.84375
|
|
0.84615385 0.9 0.85483871 0.8125 ]
|
|
|
|
mean value: 0.8482124271337981
|
|
|
|
key: train_precision
|
|
value: [0.8994614 0.88908451 0.88616462 0.88596491 0.89184397 0.88966725
|
|
0.8857645 0.88481675 0.88581315 0.88986014]
|
|
|
|
mean value: 0.888844120691569
|
|
|
|
key: test_recall
|
|
value: [1. 0.94915254 0.93220339 0.94915254 0.86440678 0.91525424
|
|
0.93220339 0.91525424 0.89830508 0.88135593]
|
|
|
|
mean value: 0.923728813559322
|
|
|
|
key: train_recall
|
|
value: [0.94350282 0.95103578 0.95291902 0.95103578 0.9472693 0.9566855
|
|
0.94915254 0.95480226 0.96421846 0.95856874]
|
|
|
|
mean value: 0.9529190207156308
|
|
|
|
key: test_roc_auc
|
|
value: [0.88983051 0.89830508 0.88983051 0.90677966 0.83050847 0.87288136
|
|
0.88135593 0.90677966 0.87288136 0.83898305]
|
|
|
|
mean value: 0.8788135593220339
|
|
|
|
key: train_roc_auc
|
|
value: [0.91902072 0.91619586 0.91525424 0.91431262 0.91619586 0.91902072
|
|
0.913371 0.91525424 0.91996234 0.91996234]
|
|
|
|
mean value: 0.9168549905838042
|
|
|
|
key: test_jcc
|
|
value: [0.81944444 0.82352941 0.80882353 0.8358209 0.71830986 0.7826087
|
|
0.79710145 0.83076923 0.77941176 0.73239437]
|
|
|
|
mean value: 0.7928213646898066
|
|
|
|
key: train_jcc
|
|
value: [0.85349233 0.85016835 0.84899329 0.84731544 0.84966216 0.85521886
|
|
0.84563758 0.84924623 0.85762144 0.85690236]
|
|
|
|
mean value: 0.8514258038769542
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06659913 0.06360626 0.06409431 0.0665195 0.06299567 0.05471039
|
|
0.07333541 0.0797708 0.07082963 0.07755327]
|
|
|
|
mean value: 0.06800143718719483
|
|
|
|
key: score_time
|
|
value: [0.02273107 0.02201724 0.01989532 0.02234793 0.02758884 0.02031136
|
|
0.03336978 0.02327919 0.03287697 0.02066684]
|
|
|
|
mean value: 0.024508452415466307
|
|
|
|
key: test_mcc
|
|
value: [0.84270097 0.83242375 0.80403577 0.81649658 0.68599434 0.74586985
|
|
0.80076161 0.71692818 0.81461308 0.80076161]
|
|
|
|
mean value: 0.7860585741944655
|
|
|
|
key: train_mcc
|
|
value: [0.83086927 0.84269225 0.83891336 0.82986174 0.85530027 0.84042935
|
|
0.82379781 0.84120382 0.84471289 0.83014064]
|
|
|
|
mean value: 0.8377921401405636
|
|
|
|
key: test_accuracy
|
|
value: [0.91525424 0.91525424 0.89830508 0.90677966 0.83898305 0.87288136
|
|
0.89830508 0.8559322 0.90677966 0.89830508]
|
|
|
|
mean value: 0.8906779661016949
|
|
|
|
key: train_accuracy
|
|
value: [0.91431262 0.91996234 0.9180791 0.913371 0.92655367 0.91902072
|
|
0.91054614 0.91902072 0.92090395 0.913371 ]
|
|
|
|
mean value: 0.9175141242937853
|
|
|
|
key: test_fscore
|
|
value: [0.921875 0.91803279 0.9047619 0.91056911 0.8503937 0.87394958
|
|
0.90322581 0.864 0.90909091 0.90322581]
|
|
|
|
mean value: 0.8959124599951677
|
|
|
|
key: train_fscore
|
|
value: [0.91734787 0.92307692 0.92126697 0.91696751 0.92909091 0.92196007
|
|
0.91402715 0.92238267 0.92405063 0.91711712]
|
|
|
|
mean value: 0.9207287818520845
|
|
|
|
key: test_precision
|
|
value: [0.85507246 0.88888889 0.85074627 0.875 0.79411765 0.86666667
|
|
0.86153846 0.81818182 0.88709677 0.86153846]
|
|
|
|
mean value: 0.8558847450491501
|
|
|
|
key: train_precision
|
|
value: [0.88596491 0.88850174 0.88675958 0.88041594 0.89806678 0.88966725
|
|
0.87979094 0.88561525 0.88869565 0.8791019 ]
|
|
|
|
mean value: 0.8862579959199931
|
|
|
|
key: test_recall
|
|
value: [1. 0.94915254 0.96610169 0.94915254 0.91525424 0.88135593
|
|
0.94915254 0.91525424 0.93220339 0.94915254]
|
|
|
|
mean value: 0.940677966101695
|
|
|
|
key: train_recall
|
|
value: [0.95103578 0.96045198 0.95856874 0.9566855 0.96233522 0.9566855
|
|
0.95103578 0.96233522 0.96233522 0.95856874]
|
|
|
|
mean value: 0.9580037664783427
|
|
|
|
key: test_roc_auc
|
|
value: [0.91525424 0.91525424 0.89830508 0.90677966 0.83898305 0.87288136
|
|
0.89830508 0.8559322 0.90677966 0.89830508]
|
|
|
|
mean value: 0.8906779661016949
|
|
|
|
key: train_roc_auc
|
|
value: [0.91431262 0.91996234 0.9180791 0.913371 0.92655367 0.91902072
|
|
0.91054614 0.91902072 0.92090395 0.913371 ]
|
|
|
|
mean value: 0.9175141242937853
|
|
|
|
key: test_jcc
|
|
value: [0.85507246 0.84848485 0.82608696 0.8358209 0.73972603 0.7761194
|
|
0.82352941 0.76056338 0.83333333 0.82352941]
|
|
|
|
mean value: 0.8122266131823862
|
|
|
|
key: train_jcc
|
|
value: [0.84731544 0.85714286 0.85402685 0.84666667 0.86757216 0.85521886
|
|
0.84166667 0.8559464 0.85882353 0.8469218 ]
|
|
|
|
mean value: 0.8531301208847907
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.52196574 5.79859757 5.48698139 3.49088597 5.46073318 6.96858454
|
|
4.37182379 4.82395959 5.74664688 3.34225059]
|
|
|
|
mean value: 4.901242923736572
|
|
|
|
key: score_time
|
|
value: [0.01337767 0.01569033 0.01523805 0.01356792 0.02483082 0.01436281
|
|
0.02122784 0.01479125 0.02194929 0.01308107]
|
|
|
|
mean value: 0.016811704635620116
|
|
|
|
key: test_mcc
|
|
value: [0.86490385 0.83483657 0.91538573 0.91643971 0.76447079 0.83050847
|
|
0.89830508 0.86440678 0.84757938 0.89882165]
|
|
|
|
mean value: 0.8635658023874291
|
|
|
|
key: train_mcc
|
|
value: [0.94016324 0.98494107 0.98496203 0.96258105 0.99058556 0.99435205
|
|
0.9717945 0.98872862 0.99623352 0.98116761]
|
|
|
|
mean value: 0.9795509246731521
|
|
|
|
key: test_accuracy
|
|
value: [0.93220339 0.91525424 0.95762712 0.95762712 0.88135593 0.91525424
|
|
0.94915254 0.93220339 0.92372881 0.94915254]
|
|
|
|
mean value: 0.9313559322033899
|
|
|
|
key: train_accuracy
|
|
value: [0.96986817 0.99246704 0.99246704 0.98116761 0.9952919 0.99717514
|
|
0.98587571 0.99435028 0.99811676 0.9905838 ]
|
|
|
|
mean value: 0.9897363465160075
|
|
|
|
key: test_fscore
|
|
value: [0.93103448 0.91935484 0.95726496 0.95867769 0.8852459 0.91525424
|
|
0.94915254 0.93220339 0.92307692 0.94827586]
|
|
|
|
mean value: 0.9319540820960427
|
|
|
|
key: train_fscore
|
|
value: [0.96940727 0.9924812 0.99249531 0.98137803 0.99529633 0.99717248
|
|
0.98580889 0.99437148 0.99811676 0.9905838 ]
|
|
|
|
mean value: 0.9897111554594793
|
|
|
|
key: test_precision
|
|
value: [0.94736842 0.87692308 0.96551724 0.93548387 0.85714286 0.91525424
|
|
0.94915254 0.93220339 0.93103448 0.96491228]
|
|
|
|
mean value: 0.9274992400417519
|
|
|
|
key: train_precision
|
|
value: [0.98446602 0.99061914 0.98878505 0.97053407 0.9943609 0.99811321
|
|
0.9904943 0.99065421 0.99811676 0.9905838 ]
|
|
|
|
mean value: 0.9896727450048616
|
|
|
|
key: test_recall
|
|
value: [0.91525424 0.96610169 0.94915254 0.98305085 0.91525424 0.91525424
|
|
0.94915254 0.93220339 0.91525424 0.93220339]
|
|
|
|
mean value: 0.9372881355932203
|
|
|
|
key: train_recall
|
|
value: [0.95480226 0.99435028 0.99623352 0.99246704 0.99623352 0.99623352
|
|
0.98116761 0.99811676 0.99811676 0.9905838 ]
|
|
|
|
mean value: 0.9898305084745762
|
|
|
|
key: test_roc_auc
|
|
value: [0.93220339 0.91525424 0.95762712 0.95762712 0.88135593 0.91525424
|
|
0.94915254 0.93220339 0.92372881 0.94915254]
|
|
|
|
mean value: 0.9313559322033899
|
|
|
|
key: train_roc_auc
|
|
value: [0.96986817 0.99246704 0.99246704 0.98116761 0.9952919 0.99717514
|
|
0.98587571 0.99435028 0.99811676 0.9905838 ]
|
|
|
|
mean value: 0.9897363465160075
|
|
|
|
key: test_jcc
|
|
value: [0.87096774 0.85074627 0.91803279 0.92063492 0.79411765 0.84375
|
|
0.90322581 0.87301587 0.85714286 0.90163934]
|
|
|
|
mean value: 0.8733273246043829
|
|
|
|
key: train_jcc
|
|
value: [0.9406308 0.98507463 0.98510242 0.96343693 0.9906367 0.9943609
|
|
0.97201493 0.98880597 0.9962406 0.98134328]
|
|
|
|
mean value: 0.9797647161181674
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13169575 0.12222648 0.11204457 0.07342315 0.09652853 0.11117435
|
|
0.09565616 0.11270905 0.11162901 0.11713266]
|
|
|
|
mean value: 0.10842196941375733
|
|
|
|
key: score_time
|
|
value: [0.0101223 0.00982261 0.01490307 0.00918174 0.00963759 0.0138104
|
|
0.01455998 0.00940537 0.00959563 0.01408815]
|
|
|
|
mean value: 0.011512684822082519
|
|
|
|
key: test_mcc
|
|
value: [0.91538573 0.81461308 0.89830508 0.88148255 0.76447079 0.78520705
|
|
0.86490385 0.81461308 0.86490385 0.84757938]
|
|
|
|
mean value: 0.8451464451008713
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95762712 0.90677966 0.94915254 0.94067797 0.88135593 0.88983051
|
|
0.93220339 0.90677966 0.93220339 0.92372881]
|
|
|
|
mean value: 0.9220338983050848
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95798319 0.90909091 0.94915254 0.94117647 0.8852459 0.896
|
|
0.93103448 0.90909091 0.93333333 0.92307692]
|
|
|
|
mean value: 0.9235184665228467
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95 0.88709677 0.94915254 0.93333333 0.85714286 0.84848485
|
|
0.94736842 0.88709677 0.91803279 0.93103448]
|
|
|
|
mean value: 0.9108742820417515
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96610169 0.93220339 0.94915254 0.94915254 0.91525424 0.94915254
|
|
0.91525424 0.93220339 0.94915254 0.91525424]
|
|
|
|
mean value: 0.9372881355932203
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95762712 0.90677966 0.94915254 0.94067797 0.88135593 0.88983051
|
|
0.93220339 0.90677966 0.93220339 0.92372881]
|
|
|
|
mean value: 0.9220338983050848
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91935484 0.83333333 0.90322581 0.88888889 0.79411765 0.8115942
|
|
0.87096774 0.83333333 0.875 0.85714286]
|
|
|
|
mean value: 0.8586958649752561
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.18448567 0.18298745 0.22127724 0.22443771 0.22705698 0.21281409
|
|
0.23458552 0.21867299 0.1968739 0.19747877]
|
|
|
|
mean value: 0.2100670337677002
|
|
|
|
key: score_time
|
|
value: [0.03019857 0.02489734 0.02019167 0.02073598 0.02320886 0.02062988
|
|
0.02146173 0.02094674 0.02185369 0.02905798]
|
|
|
|
mean value: 0.0233182430267334
|
|
|
|
key: test_mcc
|
|
value: [0.93273945 0.91538573 0.96610169 0.98319208 0.84757938 0.88148255
|
|
0.93220339 0.84855529 0.8824975 0.95038193]
|
|
|
|
mean value: 0.9140118997688764
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96610169 0.95762712 0.98305085 0.99152542 0.92372881 0.94067797
|
|
0.96610169 0.92372881 0.94067797 0.97457627]
|
|
|
|
mean value: 0.9567796610169491
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96666667 0.95726496 0.98305085 0.99159664 0.92436975 0.94117647
|
|
0.96610169 0.92173913 0.93913043 0.97391304]
|
|
|
|
mean value: 0.9565009632143014
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95081967 0.96551724 0.98305085 0.98333333 0.91666667 0.93333333
|
|
0.96610169 0.94642857 0.96428571 1. ]
|
|
|
|
mean value: 0.9609537074930958
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.98305085 0.94915254 0.98305085 1. 0.93220339 0.94915254
|
|
0.96610169 0.89830508 0.91525424 0.94915254]
|
|
|
|
mean value: 0.9525423728813559
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96610169 0.95762712 0.98305085 0.99152542 0.92372881 0.94067797
|
|
0.96610169 0.92372881 0.94067797 0.97457627]
|
|
|
|
mean value: 0.9567796610169492
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.93548387 0.91803279 0.96666667 0.98333333 0.859375 0.88888889
|
|
0.93442623 0.85483871 0.8852459 0.94915254]
|
|
|
|
mean value: 0.9175443929939718
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01268601 0.02045417 0.01607585 0.01523185 0.02098131 0.02108884
|
|
0.01327443 0.0126822 0.01312947 0.01394057]
|
|
|
|
mean value: 0.015954470634460448
|
|
|
|
key: score_time
|
|
value: [0.01495385 0.01171041 0.01105165 0.01075077 0.01700163 0.01079249
|
|
0.00932479 0.00920558 0.0091486 0.01306796]
|
|
|
|
mean value: 0.011700773239135742
|
|
|
|
key: test_mcc
|
|
value: [0.71278644 0.77977303 0.86640023 0.85348593 0.79661017 0.73049431
|
|
0.72029406 0.66446176 0.78067087 0.69531486]
|
|
|
|
mean value: 0.7600291653304111
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8559322 0.88983051 0.93220339 0.92372881 0.89830508 0.86440678
|
|
0.8559322 0.83050847 0.88983051 0.84745763]
|
|
|
|
mean value: 0.8788135593220339
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85950413 0.8907563 0.93442623 0.928 0.89830508 0.86885246
|
|
0.86614173 0.83870968 0.89256198 0.85 ]
|
|
|
|
mean value: 0.882725760119666
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83870968 0.88333333 0.9047619 0.87878788 0.89830508 0.84126984
|
|
0.80882353 0.8 0.87096774 0.83606557]
|
|
|
|
mean value: 0.8561024565435816
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88135593 0.89830508 0.96610169 0.98305085 0.89830508 0.89830508
|
|
0.93220339 0.88135593 0.91525424 0.86440678]
|
|
|
|
mean value: 0.9118644067796611
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8559322 0.88983051 0.93220339 0.92372881 0.89830508 0.86440678
|
|
0.8559322 0.83050847 0.88983051 0.84745763]
|
|
|
|
mean value: 0.8788135593220339
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75362319 0.8030303 0.87692308 0.86567164 0.81538462 0.76811594
|
|
0.76388889 0.72222222 0.80597015 0.73913043]
|
|
|
|
mean value: 0.7913960462711274
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [4.36764503 3.90507293 3.84251595 4.21998048 3.78765988 3.72160077
|
|
4.06675792 3.68579555 3.71161127 3.78875661]
|
|
|
|
mean value: 3.909739637374878
|
|
|
|
key: score_time
|
|
value: [0.1796751 0.1091311 0.10466957 0.1270268 0.10415697 0.10148597
|
|
0.09922409 0.11080122 0.11108327 0.10428238]
|
|
|
|
mean value: 0.11515364646911622
|
|
|
|
key: test_mcc
|
|
value: [0.94928891 0.95038193 0.96610169 0.98319208 0.93435318 0.8824975
|
|
0.93220339 0.94928891 0.93435318 0.98319208]
|
|
|
|
mean value: 0.9464852853904762
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.97457627 0.98305085 0.99152542 0.96610169 0.94067797
|
|
0.96610169 0.97457627 0.96610169 0.99152542]
|
|
|
|
mean value: 0.9728813559322034
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97478992 0.97391304 0.98305085 0.99145299 0.96721311 0.94214876
|
|
0.96610169 0.97435897 0.96491228 0.99145299]
|
|
|
|
mean value: 0.9729394614868917
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96666667 1. 0.98305085 1. 0.93650794 0.91935484
|
|
0.96610169 0.98275862 1. 1. ]
|
|
|
|
mean value: 0.9754440604946817
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.98305085 0.94915254 0.98305085 0.98305085 1. 0.96610169
|
|
0.96610169 0.96610169 0.93220339 0.98305085]
|
|
|
|
mean value: 0.9711864406779661
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.97457627 0.98305085 0.99152542 0.96610169 0.94067797
|
|
0.96610169 0.97457627 0.96610169 0.99152542]
|
|
|
|
mean value: 0.9728813559322034
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95081967 0.94915254 0.96666667 0.98305085 0.93650794 0.890625
|
|
0.93442623 0.95 0.93220339 0.98305085]
|
|
|
|
mean value: 0.9476503131932592
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.22308016 1.34552908 1.28087878 1.26344371 1.23413181 1.3040204
|
|
1.30803466 1.23792887 1.30580854 1.40375137]
|
|
|
|
mean value: 1.2906607389450073
|
|
|
|
key: score_time
|
|
value: [0.16982508 0.27282596 0.24609804 0.27564526 0.27050352 0.24541187
|
|
0.21185684 0.14530158 0.16621304 0.23654032]
|
|
|
|
mean value: 0.22402215003967285
|
|
|
|
key: test_mcc
|
|
value: [0.93273945 0.93273945 0.96610169 0.96610169 0.9003767 0.86490385
|
|
0.93273945 0.91538573 0.91643971 0.96665725]
|
|
|
|
mean value: 0.9294184976617113
|
|
|
|
key: train_mcc
|
|
value: [0.96987505 0.98496203 0.98306654 0.97740806 0.98116761 0.97928611
|
|
0.9793 0.97366228 0.98306654 0.97740113]
|
|
|
|
mean value: 0.978919534260023
|
|
|
|
key: test_accuracy
|
|
value: [0.96610169 0.96610169 0.98305085 0.98305085 0.94915254 0.93220339
|
|
0.96610169 0.95762712 0.95762712 0.98305085]
|
|
|
|
mean value: 0.964406779661017
|
|
|
|
key: train_accuracy
|
|
value: [0.98493409 0.99246704 0.99152542 0.98870056 0.9905838 0.98964218
|
|
0.98964218 0.98681733 0.99152542 0.98870056]
|
|
|
|
mean value: 0.9894538606403013
|
|
|
|
key: test_fscore
|
|
value: [0.96666667 0.96551724 0.98305085 0.98305085 0.95081967 0.93333333
|
|
0.96666667 0.95798319 0.95652174 0.98275862]
|
|
|
|
mean value: 0.964636882818978
|
|
|
|
key: train_fscore
|
|
value: [0.98496241 0.99249531 0.9915493 0.9887218 0.9905838 0.98965193
|
|
0.98967136 0.98686679 0.9915493 0.98870056]
|
|
|
|
mean value: 0.989475256251039
|
|
|
|
key: test_precision
|
|
value: [0.95081967 0.98245614 0.98305085 0.98305085 0.92063492 0.91803279
|
|
0.95081967 0.95 0.98214286 1. ]
|
|
|
|
mean value: 0.962100774419145
|
|
|
|
key: train_precision
|
|
value: [0.98311445 0.98878505 0.98876404 0.98686679 0.9905838 0.9887218
|
|
0.98689139 0.98317757 0.98876404 0.98870056]
|
|
|
|
mean value: 0.9874369504377938
|
|
|
|
key: test_recall
|
|
value: [0.98305085 0.94915254 0.98305085 0.98305085 0.98305085 0.94915254
|
|
0.98305085 0.96610169 0.93220339 0.96610169]
|
|
|
|
mean value: 0.9677966101694916
|
|
|
|
key: train_recall
|
|
value: [0.98681733 0.99623352 0.99435028 0.9905838 0.9905838 0.9905838
|
|
0.99246704 0.9905838 0.99435028 0.98870056]
|
|
|
|
mean value: 0.9915254237288136
|
|
|
|
key: test_roc_auc
|
|
value: [0.96610169 0.96610169 0.98305085 0.98305085 0.94915254 0.93220339
|
|
0.96610169 0.95762712 0.95762712 0.98305085]
|
|
|
|
mean value: 0.964406779661017
|
|
|
|
key: train_roc_auc
|
|
value: [0.98493409 0.99246704 0.99152542 0.98870056 0.9905838 0.98964218
|
|
0.98964218 0.98681733 0.99152542 0.98870056]
|
|
|
|
mean value: 0.9894538606403013
|
|
|
|
key: test_jcc
|
|
value: [0.93548387 0.93333333 0.96666667 0.96666667 0.90625 0.875
|
|
0.93548387 0.91935484 0.91666667 0.96610169]
|
|
|
|
mean value: 0.9321007608893749
|
|
|
|
key: train_jcc
|
|
value: [0.97037037 0.98510242 0.98324022 0.97769517 0.98134328 0.97951583
|
|
0.9795539 0.97407407 0.98324022 0.97765363]
|
|
|
|
mean value: 0.9791789126405246
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01407814 0.01759863 0.01906228 0.02906275 0.01783109 0.01736927
|
|
0.01727223 0.01780033 0.01752615 0.01729298]
|
|
|
|
mean value: 0.018489384651184083
|
|
|
|
key: score_time
|
|
value: [0.01255727 0.01245141 0.01268125 0.01314044 0.01253462 0.012501
|
|
0.01250362 0.01250291 0.01250529 0.01252937]
|
|
|
|
mean value: 0.012590718269348145
|
|
|
|
key: test_mcc
|
|
value: [0.56223688 0.71196668 0.66446176 0.69006556 0.54623099 0.63632522
|
|
0.59356147 0.7013929 0.74672866 0.6486493 ]
|
|
|
|
mean value: 0.6501619424887987
|
|
|
|
key: train_mcc
|
|
value: [0.67685622 0.66195814 0.67264225 0.66042483 0.68502151 0.67085699
|
|
0.66907359 0.65462711 0.66446176 0.67442937]
|
|
|
|
mean value: 0.6690351773463729
|
|
|
|
key: test_accuracy
|
|
value: [0.77966102 0.8559322 0.83050847 0.83898305 0.77118644 0.81355932
|
|
0.79661017 0.84745763 0.87288136 0.8220339 ]
|
|
|
|
mean value: 0.8228813559322035
|
|
|
|
key: train_accuracy
|
|
value: [0.83709981 0.82956685 0.83521657 0.82862524 0.84180791 0.83427495
|
|
0.83333333 0.82580038 0.83050847 0.83615819]
|
|
|
|
mean value: 0.8332391713747646
|
|
|
|
key: test_fscore
|
|
value: [0.79032258 0.85470085 0.83870968 0.85271318 0.784 0.828125
|
|
0.8 0.85714286 0.86956522 0.832 ]
|
|
|
|
mean value: 0.8307279365594106
|
|
|
|
key: train_fscore
|
|
value: [0.84400361 0.83708371 0.84162896 0.83662478 0.84671533 0.84086799
|
|
0.8401084 0.83378257 0.83870968 0.8423913 ]
|
|
|
|
mean value: 0.8401916323800057
|
|
|
|
key: test_precision
|
|
value: [0.75384615 0.86206897 0.8 0.78571429 0.74242424 0.76811594
|
|
0.78688525 0.80597015 0.89285714 0.78787879]
|
|
|
|
mean value: 0.798576091542221
|
|
|
|
key: train_precision
|
|
value: [0.80968858 0.80172414 0.81010453 0.79931389 0.82123894 0.80869565
|
|
0.80729167 0.79725086 0.8 0.81151832]
|
|
|
|
mean value: 0.8066826583123691
|
|
|
|
key: test_recall
|
|
value: [0.83050847 0.84745763 0.88135593 0.93220339 0.83050847 0.89830508
|
|
0.81355932 0.91525424 0.84745763 0.88135593]
|
|
|
|
mean value: 0.8677966101694915
|
|
|
|
key: train_recall
|
|
value: [0.88135593 0.87570621 0.87570621 0.87758945 0.87382298 0.87570621
|
|
0.87570621 0.87382298 0.88135593 0.87570621]
|
|
|
|
mean value: 0.876647834274953
|
|
|
|
key: test_roc_auc
|
|
value: [0.77966102 0.8559322 0.83050847 0.83898305 0.77118644 0.81355932
|
|
0.79661017 0.84745763 0.87288136 0.8220339 ]
|
|
|
|
mean value: 0.8228813559322035
|
|
|
|
key: train_roc_auc
|
|
value: [0.83709981 0.82956685 0.83521657 0.82862524 0.84180791 0.83427495
|
|
0.83333333 0.82580038 0.83050847 0.83615819]
|
|
|
|
mean value: 0.8332391713747647
|
|
|
|
key: test_jcc
|
|
value: [0.65333333 0.74626866 0.72222222 0.74324324 0.64473684 0.70666667
|
|
0.66666667 0.75 0.76923077 0.71232877]
|
|
|
|
mean value: 0.711469716730787
|
|
|
|
key: train_jcc
|
|
value: [0.7301092 0.71981424 0.7265625 0.7191358 0.73417722 0.72542902
|
|
0.72429907 0.71494607 0.72222222 0.72769953]
|
|
|
|
mean value: 0.7244394869711428
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.37202024 0.41177177 0.16031575 0.56499028 1.1614809 1.28652477
|
|
0.15573382 0.15530157 0.16763544 0.17594767]
|
|
|
|
mean value: 0.4611722230911255
|
|
|
|
key: score_time
|
|
value: [0.01143909 0.01158333 0.01154232 0.01435089 0.01593494 0.01192594
|
|
0.01248026 0.01159501 0.01144981 0.01153636]
|
|
|
|
mean value: 0.012383794784545899
|
|
|
|
key: test_mcc
|
|
value: [0.93273945 0.89882165 0.96610169 0.96610169 0.8824975 0.86891154
|
|
0.86490385 0.86640023 0.96665725 0.98319208]
|
|
|
|
mean value: 0.9196326926495288
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96610169 0.94915254 0.98305085 0.98305085 0.94067797 0.93220339
|
|
0.93220339 0.93220339 0.98305085 0.99152542]
|
|
|
|
mean value: 0.9593220338983051
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96666667 0.95 0.98305085 0.98305085 0.94214876 0.93548387
|
|
0.93103448 0.93442623 0.98275862 0.99159664]
|
|
|
|
mean value: 0.9600216964492176
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95081967 0.93442623 0.98305085 0.98305085 0.91935484 0.89230769
|
|
0.94736842 0.9047619 1. 0.98333333]
|
|
|
|
mean value: 0.9498473786719838
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.98305085 0.96610169 0.98305085 0.98305085 0.96610169 0.98305085
|
|
0.91525424 0.96610169 0.96610169 1. ]
|
|
|
|
mean value: 0.9711864406779661
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96610169 0.94915254 0.98305085 0.98305085 0.94067797 0.93220339
|
|
0.93220339 0.93220339 0.98305085 0.99152542]
|
|
|
|
mean value: 0.9593220338983052
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.93548387 0.9047619 0.96666667 0.96666667 0.890625 0.87878788
|
|
0.87096774 0.87692308 0.96610169 0.98333333]
|
|
|
|
mean value: 0.9240317834958007
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.07431316 0.0785656 0.10630751 0.10243177 0.11637735 0.07578731
|
|
0.10591316 0.09836221 0.06769347 0.07256293]
|
|
|
|
mean value: 0.08983144760131836
|
|
|
|
key: score_time
|
|
value: [0.03841519 0.01995349 0.01984167 0.01250887 0.01528406 0.01276875
|
|
0.0261066 0.02066016 0.0127275 0.01247048]
|
|
|
|
mean value: 0.019073677062988282
|
|
|
|
key: test_mcc
|
|
value: [0.88762536 0.79844727 0.88148255 0.91855865 0.71692818 0.80076161
|
|
0.78067087 0.8824975 0.84757938 0.9003767 ]
|
|
|
|
mean value: 0.8414928085865973
|
|
|
|
key: train_mcc
|
|
value: [0.90599872 0.91161871 0.90615304 0.91161871 0.92683147 0.91929626
|
|
0.91545069 0.89853456 0.90799344 0.90045822]
|
|
|
|
mean value: 0.9103953811037858
|
|
|
|
key: test_accuracy
|
|
value: [0.94067797 0.89830508 0.94067797 0.95762712 0.8559322 0.89830508
|
|
0.88983051 0.94067797 0.92372881 0.94915254]
|
|
|
|
mean value: 0.9194915254237288
|
|
|
|
key: train_accuracy
|
|
value: [0.95291902 0.95574388 0.95291902 0.95574388 0.96327684 0.95951036
|
|
0.95762712 0.94915254 0.95386064 0.95009416]
|
|
|
|
mean value: 0.9550847457627119
|
|
|
|
key: test_fscore
|
|
value: [0.944 0.90163934 0.94017094 0.95934959 0.864 0.90322581
|
|
0.89256198 0.94214876 0.92436975 0.95081967]
|
|
|
|
mean value: 0.9222285848212743
|
|
|
|
key: train_fscore
|
|
value: [0.95335821 0.95611578 0.9535316 0.95611578 0.96372093 0.96
|
|
0.95806151 0.94972067 0.9544186 0.95069767]
|
|
|
|
mean value: 0.9555740756237653
|
|
|
|
key: test_precision
|
|
value: [0.89393939 0.87301587 0.94827586 0.921875 0.81818182 0.86153846
|
|
0.87096774 0.91935484 0.91666667 0.92063492]
|
|
|
|
mean value: 0.8944450576691261
|
|
|
|
key: train_precision
|
|
value: [0.94454713 0.94814815 0.9412844 0.94814815 0.95220588 0.94852941
|
|
0.94833948 0.93922652 0.94301471 0.93933824]
|
|
|
|
mean value: 0.9452782072927294
|
|
|
|
key: test_recall
|
|
value: [1. 0.93220339 0.93220339 1. 0.91525424 0.94915254
|
|
0.91525424 0.96610169 0.93220339 0.98305085]
|
|
|
|
mean value: 0.9525423728813559
|
|
|
|
key: train_recall
|
|
value: [0.96233522 0.96421846 0.96610169 0.96421846 0.97551789 0.97175141
|
|
0.96798493 0.96045198 0.96610169 0.96233522]
|
|
|
|
mean value: 0.9661016949152542
|
|
|
|
key: test_roc_auc
|
|
value: [0.94067797 0.89830508 0.94067797 0.95762712 0.8559322 0.89830508
|
|
0.88983051 0.94067797 0.92372881 0.94915254]
|
|
|
|
mean value: 0.9194915254237288
|
|
|
|
key: train_roc_auc
|
|
value: [0.95291902 0.95574388 0.95291902 0.95574388 0.96327684 0.95951036
|
|
0.95762712 0.94915254 0.95386064 0.95009416]
|
|
|
|
mean value: 0.9550847457627119
|
|
|
|
key: test_jcc
|
|
value: [0.89393939 0.82089552 0.88709677 0.921875 0.76056338 0.82352941
|
|
0.80597015 0.890625 0.859375 0.90625 ]
|
|
|
|
mean value: 0.8570119631821129
|
|
|
|
key: train_jcc
|
|
value: [0.91087344 0.91592129 0.91119005 0.91592129 0.92998205 0.92307692
|
|
0.91949911 0.90425532 0.91281139 0.90602837]
|
|
|
|
mean value: 0.914955922074459
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.03478527 0.01645541 0.01644373 0.01668286 0.01657367 0.01646161
|
|
0.01675296 0.01681471 0.01642871 0.01651549]
|
|
|
|
mean value: 0.01839144229888916
|
|
|
|
key: score_time
|
|
value: [0.01238036 0.0122776 0.01227784 0.01224566 0.01230073 0.0122776
|
|
0.01266789 0.01228118 0.0122366 0.01230717]
|
|
|
|
mean value: 0.012325263023376465
|
|
|
|
key: test_mcc
|
|
value: [0.51209156 0.5424508 0.54307539 0.69531486 0.49180807 0.59356147
|
|
0.62747926 0.61025715 0.71443451 0.61095981]
|
|
|
|
mean value: 0.5941432880816522
|
|
|
|
key: train_mcc
|
|
value: [0.629003 0.59515529 0.61240469 0.60264615 0.62712309 0.61958569
|
|
0.59887961 0.61406774 0.59510463 0.59328768]
|
|
|
|
mean value: 0.6087257580291437
|
|
|
|
key: test_accuracy
|
|
value: [0.75423729 0.77118644 0.77118644 0.84745763 0.74576271 0.79661017
|
|
0.81355932 0.80508475 0.8559322 0.80508475]
|
|
|
|
mean value: 0.7966101694915254
|
|
|
|
key: train_accuracy
|
|
value: [0.81450094 0.79755179 0.80602637 0.80131827 0.81355932 0.80979284
|
|
0.79943503 0.80696798 0.79755179 0.79661017]
|
|
|
|
mean value: 0.804331450094162
|
|
|
|
key: test_fscore
|
|
value: [0.768 0.76923077 0.76521739 0.85 0.75 0.8
|
|
0.81034483 0.80672269 0.84955752 0.80991736]
|
|
|
|
mean value: 0.7978990554692749
|
|
|
|
key: train_fscore
|
|
value: [0.81432611 0.79620853 0.80925926 0.80187793 0.81320755 0.80979284
|
|
0.8 0.80894688 0.79774224 0.79813084]
|
|
|
|
mean value: 0.8049492180624281
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.77586207 0.78571429 0.83606557 0.73770492 0.78688525
|
|
0.8245614 0.8 0.88888889 0.79032258]
|
|
|
|
mean value: 0.795327769270027
|
|
|
|
key: train_precision
|
|
value: [0.81509434 0.80152672 0.79599271 0.79962547 0.8147448 0.80979284
|
|
0.79775281 0.80073801 0.79699248 0.79220779]
|
|
|
|
mean value: 0.8024467974353262
|
|
|
|
key: test_recall
|
|
value: [0.81355932 0.76271186 0.74576271 0.86440678 0.76271186 0.81355932
|
|
0.79661017 0.81355932 0.81355932 0.83050847]
|
|
|
|
mean value: 0.8016949152542373
|
|
|
|
key: train_recall
|
|
value: [0.81355932 0.79096045 0.82297552 0.80414313 0.81167608 0.80979284
|
|
0.80225989 0.8173258 0.79849341 0.80414313]
|
|
|
|
mean value: 0.8075329566854991
|
|
|
|
key: test_roc_auc
|
|
value: [0.75423729 0.77118644 0.77118644 0.84745763 0.74576271 0.79661017
|
|
0.81355932 0.80508475 0.8559322 0.80508475]
|
|
|
|
mean value: 0.7966101694915254
|
|
|
|
key: train_roc_auc
|
|
value: [0.81450094 0.79755179 0.80602637 0.80131827 0.81355932 0.80979284
|
|
0.79943503 0.80696798 0.79755179 0.79661017]
|
|
|
|
mean value: 0.804331450094162
|
|
|
|
key: test_jcc
|
|
value: [0.62337662 0.625 0.61971831 0.73913043 0.6 0.66666667
|
|
0.68115942 0.67605634 0.73846154 0.68055556]
|
|
|
|
mean value: 0.6650124887020171
|
|
|
|
key: train_jcc
|
|
value: [0.68680445 0.66141732 0.67962675 0.669279 0.68521463 0.68037975
|
|
0.66666667 0.67918623 0.66353678 0.66407465]
|
|
|
|
mean value: 0.6736186215487183
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04658747 0.03060317 0.02728319 0.03094292 0.03298497 0.05013299
|
|
0.05351949 0.05690956 0.05424237 0.04046535]
|
|
|
|
mean value: 0.04236714839935303
|
|
|
|
key: score_time
|
|
value: [0.0144105 0.01238847 0.01245499 0.01262283 0.01258159 0.01322532
|
|
0.01283717 0.01502204 0.0218544 0.01317573]
|
|
|
|
mean value: 0.014057302474975586
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.74586985 0.88148255 0.91643971 0.74437255 0.68041382
|
|
0.83483657 0.69504805 0.72970373 0.72980045]
|
|
|
|
mean value: 0.7774463875714012
|
|
|
|
key: train_mcc
|
|
value: [0.89545371 0.85007077 0.85036085 0.82664841 0.87552418 0.82347825
|
|
0.80830501 0.74151322 0.63328936 0.79277693]
|
|
|
|
mean value: 0.8097420685120778
|
|
|
|
key: test_accuracy
|
|
value: [0.90677966 0.87288136 0.94067797 0.95762712 0.86440678 0.83898305
|
|
0.91525424 0.83898305 0.84745763 0.8559322 ]
|
|
|
|
mean value: 0.8838983050847458
|
|
|
|
key: train_accuracy
|
|
value: [0.9472693 0.92467043 0.92467043 0.91148776 0.93596987 0.90960452
|
|
0.89830508 0.85875706 0.78625235 0.88794727]
|
|
|
|
mean value: 0.8984934086629002
|
|
|
|
key: test_fscore
|
|
value: [0.91056911 0.87179487 0.94117647 0.95652174 0.87692308 0.83185841
|
|
0.91935484 0.81904762 0.86764706 0.87022901]
|
|
|
|
mean value: 0.8865122195421735
|
|
|
|
key: train_fscore
|
|
value: [0.94843462 0.92307692 0.92647059 0.90711462 0.93873874 0.9047619
|
|
0.90625 0.83836207 0.82389449 0.89837746]
|
|
|
|
mean value: 0.9015481417772753
|
|
|
|
key: test_precision
|
|
value: [0.875 0.87931034 0.93333333 0.98214286 0.8028169 0.87037037
|
|
0.87692308 0.93478261 0.76623377 0.79166667]
|
|
|
|
mean value: 0.871257992560176
|
|
|
|
key: train_precision
|
|
value: [0.92792793 0.94302554 0.9048474 0.95426195 0.89982729 0.95597484
|
|
0.84057971 0.97984887 0.7005277 0.821875 ]
|
|
|
|
mean value: 0.892869623155811
|
|
|
|
key: test_recall
|
|
value: [0.94915254 0.86440678 0.94915254 0.93220339 0.96610169 0.79661017
|
|
0.96610169 0.72881356 1. 0.96610169]
|
|
|
|
mean value: 0.911864406779661
|
|
|
|
key: train_recall
|
|
value: [0.96986817 0.9039548 0.94915254 0.86440678 0.98116761 0.85875706
|
|
0.98305085 0.73258004 1. 0.9905838 ]
|
|
|
|
mean value: 0.9233521657250471
|
|
|
|
key: test_roc_auc
|
|
value: [0.90677966 0.87288136 0.94067797 0.95762712 0.86440678 0.83898305
|
|
0.91525424 0.83898305 0.84745763 0.8559322 ]
|
|
|
|
mean value: 0.8838983050847458
|
|
|
|
key: train_roc_auc
|
|
value: [0.9472693 0.92467043 0.92467043 0.91148776 0.93596987 0.90960452
|
|
0.89830508 0.85875706 0.78625235 0.88794727]
|
|
|
|
mean value: 0.8984934086629002
|
|
|
|
key: test_jcc
|
|
value: [0.8358209 0.77272727 0.88888889 0.91666667 0.78082192 0.71212121
|
|
0.85074627 0.69354839 0.76623377 0.77027027]
|
|
|
|
mean value: 0.7987845545992175
|
|
|
|
key: train_jcc
|
|
value: [0.90192644 0.85714286 0.8630137 0.83001808 0.88455008 0.82608696
|
|
0.82857143 0.72170686 0.7005277 0.81550388]
|
|
|
|
mean value: 0.8229047998790558
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04099655 0.03378463 0.03922915 0.03761435 0.03227115 0.03588915
|
|
0.03760219 0.03354454 0.04781771 0.03898621]
|
|
|
|
mean value: 0.03777356147766113
|
|
|
|
key: score_time
|
|
value: [0.01846004 0.01262856 0.01286602 0.01281357 0.0128808 0.01260662
|
|
0.01285553 0.012429 0.01237774 0.01896715]
|
|
|
|
mean value: 0.01388850212097168
|
|
|
|
key: test_mcc
|
|
value: [0.80830501 0.65292863 0.80076161 0.64937412 0.62577865 0.76668665
|
|
0.83242375 0.18731716 0.75907212 0.8136762 ]
|
|
|
|
mean value: 0.6896323894317706
|
|
|
|
key: train_mcc
|
|
value: [0.86970353 0.67807537 0.81544053 0.57662541 0.70611577 0.90453403
|
|
0.80243482 0.31596468 0.78753987 0.88162018]
|
|
|
|
mean value: 0.733805420830759
|
|
|
|
key: test_accuracy
|
|
value: [0.89830508 0.80508475 0.89830508 0.79661017 0.78813559 0.88135593
|
|
0.91525424 0.53389831 0.87288136 0.90677966]
|
|
|
|
mean value: 0.8296610169491525
|
|
|
|
key: train_accuracy
|
|
value: [0.93314501 0.81826742 0.9039548 0.74952919 0.83333333 0.9519774
|
|
0.89359699 0.59227872 0.88700565 0.94067797]
|
|
|
|
mean value: 0.8503766478342749
|
|
|
|
key: test_fscore
|
|
value: [0.90625 0.7628866 0.89285714 0.83098592 0.82269504 0.88709677
|
|
0.91803279 0.12698413 0.85981308 0.90756303]
|
|
|
|
mean value: 0.7915164489134392
|
|
|
|
key: train_fscore
|
|
value: [0.93597836 0.78043231 0.8969697 0.7996988 0.85691188 0.95282146
|
|
0.90300429 0.31378764 0.87551867 0.94139535]
|
|
|
|
mean value: 0.8256518457224447
|
|
|
|
key: test_precision
|
|
value: [0.84057971 0.97368421 0.94339623 0.71084337 0.70731707 0.84615385
|
|
0.88888889 1. 0.95833333 0.9 ]
|
|
|
|
mean value: 0.8769196662127113
|
|
|
|
key: train_precision
|
|
value: [0.89792388 0.98563218 0.96732026 0.66624843 0.75070822 0.93636364
|
|
0.829653 0.99 0.97459584 0.93014706]
|
|
|
|
mean value: 0.8928592502683212
|
|
|
|
key: test_recall
|
|
value: [0.98305085 0.62711864 0.84745763 1. 0.98305085 0.93220339
|
|
0.94915254 0.06779661 0.77966102 0.91525424]
|
|
|
|
mean value: 0.8084745762711865
|
|
|
|
key: train_recall
|
|
value: [0.97740113 0.64595104 0.83615819 1. 0.99811676 0.96986817
|
|
0.9905838 0.18644068 0.79472693 0.95291902]
|
|
|
|
mean value: 0.835216572504708
|
|
|
|
key: test_roc_auc
|
|
value: [0.89830508 0.80508475 0.89830508 0.79661017 0.78813559 0.88135593
|
|
0.91525424 0.53389831 0.87288136 0.90677966]
|
|
|
|
mean value: 0.8296610169491526
|
|
|
|
key: train_roc_auc
|
|
value: [0.93314501 0.81826742 0.9039548 0.74952919 0.83333333 0.9519774
|
|
0.89359699 0.59227872 0.88700565 0.94067797]
|
|
|
|
mean value: 0.8503766478342749
|
|
|
|
key: test_jcc
|
|
value: [0.82857143 0.61666667 0.80645161 0.71084337 0.69879518 0.79710145
|
|
0.84848485 0.06779661 0.75409836 0.83076923]
|
|
|
|
mean value: 0.6959578761712859
|
|
|
|
key: train_jcc
|
|
value: [0.87966102 0.63992537 0.81318681 0.66624843 0.74964639 0.90989399
|
|
0.82316119 0.18609023 0.77859779 0.88927944]
|
|
|
|
mean value: 0.733569065954246
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.46183872 0.49180198 0.46990943 0.43458843 0.42780256 0.43104339
|
|
0.44664502 0.4341023 0.42976546 0.43114185]
|
|
|
|
mean value: 0.4458639144897461
|
|
|
|
key: score_time
|
|
value: [0.0173285 0.01978135 0.01621866 0.01623845 0.01628375 0.01638222
|
|
0.01610398 0.01675797 0.01665401 0.0163188 ]
|
|
|
|
mean value: 0.016806769371032714
|
|
|
|
key: test_mcc
|
|
value: [0.95038193 0.88148255 0.91538573 0.91538573 0.81461308 0.79844727
|
|
0.84757938 0.83483657 0.88148255 0.91538573]
|
|
|
|
mean value: 0.8754980525303137
|
|
|
|
key: train_mcc
|
|
value: [0.95867755 0.95859594 0.96422017 0.96612911 0.96989569 0.97551962
|
|
0.95497162 0.95486322 0.95862994 0.96045368]
|
|
|
|
mean value: 0.9621956524507043
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.94067797 0.95762712 0.95762712 0.90677966 0.89830508
|
|
0.92372881 0.91525424 0.94067797 0.95762712]
|
|
|
|
mean value: 0.9372881355932203
|
|
|
|
key: train_accuracy
|
|
value: [0.97928437 0.97928437 0.98210923 0.98305085 0.98493409 0.98775895
|
|
0.97740113 0.97740113 0.97928437 0.98022599]
|
|
|
|
mean value: 0.9810734463276836
|
|
|
|
key: test_fscore
|
|
value: [0.97520661 0.94017094 0.95726496 0.95726496 0.90909091 0.90163934
|
|
0.92307692 0.91935484 0.94117647 0.95726496]
|
|
|
|
mean value: 0.9381510909264099
|
|
|
|
key: train_fscore
|
|
value: [0.97943925 0.9793621 0.98212606 0.98311445 0.98499062 0.98777046
|
|
0.97761194 0.97752809 0.97940075 0.98024459]
|
|
|
|
mean value: 0.981158830863148
|
|
|
|
key: test_precision
|
|
value: [0.9516129 0.94827586 0.96551724 0.96551724 0.88709677 0.87301587
|
|
0.93103448 0.87692308 0.93333333 0.96551724]
|
|
|
|
mean value: 0.9297844029657155
|
|
|
|
key: train_precision
|
|
value: [0.97217069 0.97570093 0.98120301 0.97943925 0.98130841 0.98684211
|
|
0.96857671 0.97206704 0.97392924 0.97932331]
|
|
|
|
mean value: 0.9770560691041761
|
|
|
|
key: test_recall
|
|
value: [1. 0.93220339 0.94915254 0.94915254 0.93220339 0.93220339
|
|
0.91525424 0.96610169 0.94915254 0.94915254]
|
|
|
|
mean value: 0.9474576271186441
|
|
|
|
key: train_recall
|
|
value: [0.98681733 0.98305085 0.98305085 0.98681733 0.98870056 0.98870056
|
|
0.98681733 0.98305085 0.98493409 0.98116761]
|
|
|
|
mean value: 0.9853107344632769
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.94067797 0.95762712 0.95762712 0.90677966 0.89830508
|
|
0.92372881 0.91525424 0.94067797 0.95762712]
|
|
|
|
mean value: 0.9372881355932204
|
|
|
|
key: train_roc_auc
|
|
value: [0.97928437 0.97928437 0.98210923 0.98305085 0.98493409 0.98775895
|
|
0.97740113 0.97740113 0.97928437 0.98022599]
|
|
|
|
mean value: 0.9810734463276836
|
|
|
|
key: test_jcc
|
|
value: [0.9516129 0.88709677 0.91803279 0.91803279 0.83333333 0.82089552
|
|
0.85714286 0.85074627 0.88888889 0.91803279]
|
|
|
|
mean value: 0.8843814908484948
|
|
|
|
key: train_jcc
|
|
value: [0.95970696 0.95955882 0.96487985 0.96678967 0.97042514 0.97583643
|
|
0.95620438 0.95604396 0.95963303 0.96125461]
|
|
|
|
mean value: 0.9630332848792733
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.25657034 0.25415993 0.27294445 0.24814987 0.2596736 0.25744009
|
|
0.2347095 0.26354051 0.25839305 0.26013041]
|
|
|
|
mean value: 0.2565711736679077
|
|
|
|
key: score_time
|
|
value: [0.03586483 0.02136898 0.04403019 0.03612494 0.04004169 0.02626038
|
|
0.01989675 0.0385263 0.02476692 0.01999259]
|
|
|
|
mean value: 0.030687355995178224
|
|
|
|
key: test_mcc
|
|
value: [0.95038193 0.88148255 0.93220339 0.95038193 0.83483657 0.84855529
|
|
0.86490385 0.83098605 0.9003767 0.88148255]
|
|
|
|
mean value: 0.8875590826206692
|
|
|
|
key: train_mcc
|
|
value: [0.99435205 0.99624059 0.99059961 0.99062772 0.99247408 0.99436615
|
|
0.98319208 0.99811853 0.99059961 0.98870758]
|
|
|
|
mean value: 0.9919278008353247
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.94067797 0.96610169 0.97457627 0.91525424 0.92372881
|
|
0.93220339 0.91525424 0.94915254 0.94067797]
|
|
|
|
mean value: 0.9432203389830508
|
|
|
|
key: train_accuracy
|
|
value: [0.99717514 0.99811676 0.9952919 0.9952919 0.99623352 0.99717514
|
|
0.99152542 0.99905838 0.9952919 0.99435028]
|
|
|
|
mean value: 0.9959510357815442
|
|
|
|
key: test_fscore
|
|
value: [0.97520661 0.94117647 0.96610169 0.97391304 0.91935484 0.92561983
|
|
0.93103448 0.91666667 0.94736842 0.94017094]
|
|
|
|
mean value: 0.9436613004621278
|
|
|
|
key: train_fscore
|
|
value: [0.99717248 0.99811321 0.99527856 0.99526963 0.99622642 0.99716714
|
|
0.99145299 0.99905927 0.99527856 0.9943609 ]
|
|
|
|
mean value: 0.9959379160580135
|
|
|
|
key: test_precision
|
|
value: [0.9516129 0.93333333 0.96610169 1. 0.87692308 0.90322581
|
|
0.94736842 0.90163934 0.98181818 0.94827586]
|
|
|
|
mean value: 0.9410298624051158
|
|
|
|
key: train_precision
|
|
value: [0.99811321 1. 0.99810606 1. 0.99810964 1.
|
|
1. 0.9981203 0.99810606 0.99249531]
|
|
|
|
mean value: 0.9983050579911409
|
|
|
|
key: test_recall
|
|
value: [1. 0.94915254 0.96610169 0.94915254 0.96610169 0.94915254
|
|
0.91525424 0.93220339 0.91525424 0.93220339]
|
|
|
|
mean value: 0.9474576271186441
|
|
|
|
key: train_recall
|
|
value: [0.99623352 0.99623352 0.99246704 0.9905838 0.99435028 0.99435028
|
|
0.98305085 1. 0.99246704 0.99623352]
|
|
|
|
mean value: 0.9935969868173258
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.94067797 0.96610169 0.97457627 0.91525424 0.92372881
|
|
0.93220339 0.91525424 0.94915254 0.94067797]
|
|
|
|
mean value: 0.9432203389830509
|
|
|
|
key: train_roc_auc
|
|
value: [0.99717514 0.99811676 0.9952919 0.9952919 0.99623352 0.99717514
|
|
0.99152542 0.99905838 0.9952919 0.99435028]
|
|
|
|
mean value: 0.9959510357815443
|
|
|
|
key: test_jcc
|
|
value: [0.9516129 0.88888889 0.93442623 0.94915254 0.85074627 0.86153846
|
|
0.87096774 0.84615385 0.9 0.88709677]
|
|
|
|
mean value: 0.894058365647383
|
|
|
|
key: train_jcc
|
|
value: [0.9943609 0.99623352 0.9906015 0.9905838 0.9924812 0.99435028
|
|
0.98305085 0.9981203 0.9906015 0.98878505]
|
|
|
|
mean value: 0.9919168916006686
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.83014345 0.64683604 0.56206465 0.71880174 0.61612749 0.58552098
|
|
0.72379494 0.82546329 0.75197577 0.59547186]
|
|
|
|
mean value: 0.6856200218200683
|
|
|
|
key: score_time
|
|
value: [0.05363488 0.04660821 0.0509665 0.02549338 0.04703069 0.04637933
|
|
0.02732182 0.04765701 0.03586054 0.02568507]
|
|
|
|
mean value: 0.040663743019104005
|
|
|
|
key: test_mcc
|
|
value: [0.81934649 0.83483657 0.83483657 0.85348593 0.68278874 0.76315046
|
|
0.80403577 0.76447079 0.83098605 0.76315046]
|
|
|
|
mean value: 0.7951087827335654
|
|
|
|
key: train_mcc
|
|
value: [0.94919462 0.95491064 0.94552189 0.93805441 0.94546822 0.94367018
|
|
0.94737683 0.94923503 0.95320473 0.94928891]
|
|
|
|
mean value: 0.9475925441433771
|
|
|
|
key: test_accuracy
|
|
value: [0.90677966 0.91525424 0.91525424 0.92372881 0.83898305 0.88135593
|
|
0.89830508 0.88135593 0.91525424 0.88135593]
|
|
|
|
mean value: 0.8957627118644068
|
|
|
|
key: train_accuracy
|
|
value: [0.97457627 0.97740113 0.97269303 0.96892655 0.97269303 0.97175141
|
|
0.97363465 0.97457627 0.97645951 0.97457627]
|
|
|
|
mean value: 0.9737288135593221
|
|
|
|
key: test_fscore
|
|
value: [0.912 0.91935484 0.91935484 0.928 0.848 0.88333333
|
|
0.9047619 0.8852459 0.9137931 0.87931034]
|
|
|
|
mean value: 0.8993154265429799
|
|
|
|
key: train_fscore
|
|
value: [0.97469541 0.97757009 0.9729225 0.96924511 0.97287184 0.97201493
|
|
0.97383178 0.97474275 0.97674419 0.97478992]
|
|
|
|
mean value: 0.9739428506818061
|
|
|
|
key: test_precision
|
|
value: [0.86363636 0.87692308 0.87692308 0.87878788 0.8030303 0.86885246
|
|
0.85074627 0.85714286 0.92982456 0.89473684]
|
|
|
|
mean value: 0.8700603687625438
|
|
|
|
key: train_precision
|
|
value: [0.97014925 0.9703154 0.96481481 0.95940959 0.96654275 0.96303142
|
|
0.96660482 0.96840149 0.96507353 0.96666667]
|
|
|
|
mean value: 0.9661009742563458
|
|
|
|
key: test_recall
|
|
value: [0.96610169 0.96610169 0.96610169 0.98305085 0.89830508 0.89830508
|
|
0.96610169 0.91525424 0.89830508 0.86440678]
|
|
|
|
mean value: 0.9322033898305084
|
|
|
|
key: train_recall
|
|
value: [0.97928437 0.98493409 0.98116761 0.97928437 0.97928437 0.98116761
|
|
0.98116761 0.98116761 0.98870056 0.98305085]
|
|
|
|
mean value: 0.9819209039548022
|
|
|
|
key: test_roc_auc
|
|
value: [0.90677966 0.91525424 0.91525424 0.92372881 0.83898305 0.88135593
|
|
0.89830508 0.88135593 0.91525424 0.88135593]
|
|
|
|
mean value: 0.8957627118644068
|
|
|
|
key: train_roc_auc
|
|
value: [0.97457627 0.97740113 0.97269303 0.96892655 0.97269303 0.97175141
|
|
0.97363465 0.97457627 0.97645951 0.97457627]
|
|
|
|
mean value: 0.9737288135593221
|
|
|
|
key: test_jcc
|
|
value: [0.83823529 0.85074627 0.85074627 0.86567164 0.73611111 0.79104478
|
|
0.82608696 0.79411765 0.84126984 0.78461538]
|
|
|
|
mean value: 0.8178645189918428
|
|
|
|
key: train_jcc
|
|
value: [0.95063985 0.95612431 0.94727273 0.9403255 0.94717668 0.94555354
|
|
0.94899818 0.95072993 0.95454545 0.95081967]
|
|
|
|
mean value: 0.9492185848842221
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.20070601 2.270854 2.22921824 1.99573541 1.93387723 1.94780111
|
|
2.09908342 2.27794266 2.29656458 2.32174754]
|
|
|
|
mean value: 2.1573530197143556
|
|
|
|
key: score_time
|
|
value: [0.01678896 0.0120194 0.01040173 0.00970078 0.00974345 0.00976443
|
|
0.01433015 0.01530862 0.0113833 0.01660562]
|
|
|
|
mean value: 0.012604641914367675
|
|
|
|
key: test_mcc
|
|
value: [0.95038193 0.93220339 0.94928891 0.96610169 0.86891154 0.83242375
|
|
0.89882165 0.83242375 0.93220339 0.96610169]
|
|
|
|
mean value: 0.9128861677880097
|
|
|
|
key: train_mcc
|
|
value: [0.99435205 0.98870758 0.99246704 0.99435205 0.99246704 0.99246704
|
|
0.99623352 0.99247408 0.99246704 0.99058556]
|
|
|
|
mean value: 0.992657300840727
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.96610169 0.97457627 0.98305085 0.93220339 0.91525424
|
|
0.94915254 0.91525424 0.96610169 0.98305085]
|
|
|
|
mean value: 0.9559322033898305
|
|
|
|
key: train_accuracy
|
|
value: [0.99717514 0.99435028 0.99623352 0.99717514 0.99623352 0.99623352
|
|
0.99811676 0.99623352 0.99623352 0.9952919 ]
|
|
|
|
mean value: 0.9963276836158191
|
|
|
|
key: test_fscore
|
|
value: [0.97520661 0.96610169 0.97478992 0.98305085 0.93548387 0.91803279
|
|
0.94827586 0.91803279 0.96610169 0.98305085]
|
|
|
|
mean value: 0.9568126919089597
|
|
|
|
key: train_fscore
|
|
value: [0.9971778 0.99433962 0.99623352 0.9971778 0.99623352 0.99623352
|
|
0.99811676 0.9962406 0.99623352 0.99528746]
|
|
|
|
mean value: 0.9963274133624827
|
|
|
|
key: test_precision
|
|
value: [0.9516129 0.96610169 0.96666667 0.98305085 0.89230769 0.88888889
|
|
0.96491228 0.88888889 0.96610169 0.98305085]
|
|
|
|
mean value: 0.945158240542546
|
|
|
|
key: train_precision
|
|
value: [0.9962406 0.99621928 0.99623352 0.9962406 0.99623352 0.99623352
|
|
0.99811676 0.99437148 0.99623352 0.99622642]
|
|
|
|
mean value: 0.9962349229399361
|
|
|
|
key: test_recall
|
|
value: [1. 0.96610169 0.98305085 0.98305085 0.98305085 0.94915254
|
|
0.93220339 0.94915254 0.96610169 0.98305085]
|
|
|
|
mean value: 0.9694915254237289
|
|
|
|
key: train_recall
|
|
value: [0.99811676 0.99246704 0.99623352 0.99811676 0.99623352 0.99623352
|
|
0.99811676 0.99811676 0.99623352 0.99435028]
|
|
|
|
mean value: 0.996421845574388
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.96610169 0.97457627 0.98305085 0.93220339 0.91525424
|
|
0.94915254 0.91525424 0.96610169 0.98305085]
|
|
|
|
mean value: 0.9559322033898305
|
|
|
|
key: train_roc_auc
|
|
value: [0.99717514 0.99435028 0.99623352 0.99717514 0.99623352 0.99623352
|
|
0.99811676 0.99623352 0.99623352 0.9952919 ]
|
|
|
|
mean value: 0.9963276836158191
|
|
|
|
key: test_jcc
|
|
value: [0.9516129 0.93442623 0.95081967 0.96666667 0.87878788 0.84848485
|
|
0.90163934 0.84848485 0.93442623 0.96666667]
|
|
|
|
mean value: 0.9182015287726552
|
|
|
|
key: train_jcc
|
|
value: [0.99437148 0.98874296 0.99249531 0.99437148 0.99249531 0.99249531
|
|
0.9962406 0.99250936 0.99249531 0.99061914]
|
|
|
|
mean value: 0.9926836268739602
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05143738 0.0859158 0.0742116 0.05209136 0.05726671 0.10368776
|
|
0.08659029 0.10663414 0.08550072 0.11223626]
|
|
|
|
mean value: 0.08155720233917237
|
|
|
|
key: score_time
|
|
value: [0.01780772 0.01327252 0.0135088 0.035115 0.01483512 0.01323867
|
|
0.01493788 0.02044654 0.0132556 0.01528311]
|
|
|
|
mean value: 0.017170095443725587
|
|
|
|
key: test_mcc
|
|
value: [0.7710996 0.87246434 0.71534603 0.84270097 0.72970373 0.71611487
|
|
0.82807867 0.78513588 0.78513588 0.87246434]
|
|
|
|
mean value: 0.7918244328408633
|
|
|
|
key: train_mcc
|
|
value: [0.81361651 0.80723786 0.83293451 0.82969529 0.80564777 0.79141643
|
|
0.82163183 0.81361651 0.78044276 0.81201909]
|
|
|
|
mean value: 0.8108258578842653
|
|
|
|
key: test_accuracy
|
|
value: [0.87288136 0.93220339 0.84745763 0.91525424 0.84745763 0.83898305
|
|
0.90677966 0.88135593 0.88135593 0.93220339]
|
|
|
|
mean value: 0.885593220338983
|
|
|
|
key: train_accuracy
|
|
value: [0.89830508 0.89453861 0.90960452 0.90772128 0.89359699 0.88512241
|
|
0.90301318 0.89830508 0.87853107 0.89736347]
|
|
|
|
mean value: 0.8966101694915254
|
|
|
|
key: test_fscore
|
|
value: [0.88721805 0.93650794 0.86363636 0.921875 0.86764706 0.86131387
|
|
0.91472868 0.89393939 0.89393939 0.93650794]
|
|
|
|
mean value: 0.8977313679251017
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
|
|
key: train_fscore
|
|
value: [0.90769231 0.90459966 0.91709845 0.91551724 0.90382979 0.89695946
|
|
0.91158798 0.90769231 0.89168766 0.90691716]
|
|
|
|
mean value: 0.9063582013417525
|
|
|
|
key: test_precision
|
|
value: [0.7972973 0.88059701 0.78082192 0.85507246 0.76623377 0.75641026
|
|
0.84285714 0.80821918 0.80821918 0.88059701]
|
|
|
|
mean value: 0.8176325230389928
|
|
|
|
key: train_precision
|
|
value: [0.83098592 0.82581649 0.84688995 0.84419714 0.82453416 0.81316998
|
|
0.83753943 0.83098592 0.80454545 0.8296875 ]
|
|
|
|
mean value: 0.8288351939578175
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96610169 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.87288136 0.93220339 0.84745763 0.91525424 0.84745763 0.83898305
|
|
0.90677966 0.88135593 0.88135593 0.93220339]
|
|
|
|
mean value: 0.885593220338983
|
|
|
|
key: train_roc_auc
|
|
value: [0.89830508 0.89453861 0.90960452 0.90772128 0.89359699 0.88512241
|
|
0.90301318 0.89830508 0.87853107 0.89736347]
|
|
|
|
mean value: 0.8966101694915254
|
|
|
|
key: test_jcc
|
|
value: [0.7972973 0.88059701 0.76 0.85507246 0.76623377 0.75641026
|
|
0.84285714 0.80821918 0.80821918 0.88059701]
|
|
|
|
mean value: 0.8155503312581709
|
|
|
|
key: train_jcc
|
|
value: [0.83098592 0.82581649 0.84688995 0.84419714 0.82453416 0.81316998
|
|
0.83753943 0.83098592 0.80454545 0.8296875 ]
|
|
|
|
mean value: 0.8288351939578175
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02751517 0.02795219 0.05592585 0.04007721 0.08132792 0.04165816
|
|
0.05441833 0.04509616 0.04095101 0.02651 ]
|
|
|
|
mean value: 0.0441431999206543
|
|
|
|
key: score_time
|
|
value: [0.03031325 0.02935934 0.03010917 0.03011847 0.03001142 0.02011967
|
|
0.02701402 0.02952623 0.01896095 0.0126071 ]
|
|
|
|
mean value: 0.025813961029052736
|
|
|
|
key: test_mcc
|
|
value: [0.87246434 0.81461308 0.88148255 0.91538573 0.73560719 0.83483657
|
|
0.86440678 0.80076161 0.83098605 0.9003767 ]
|
|
|
|
mean value: 0.8450920620972893
|
|
|
|
key: train_mcc
|
|
value: [0.89301174 0.90076514 0.89153764 0.88192068 0.90238526 0.90414883
|
|
0.89712369 0.88740859 0.89322736 0.88581541]
|
|
|
|
mean value: 0.8937344340830832
|
|
|
|
key: test_accuracy
|
|
value: [0.93220339 0.90677966 0.94067797 0.95762712 0.86440678 0.91525424
|
|
0.93220339 0.89830508 0.91525424 0.94915254]
|
|
|
|
mean value: 0.9211864406779661
|
|
|
|
key: train_accuracy
|
|
value: [0.94632768 0.95009416 0.94538606 0.94067797 0.95103578 0.9519774
|
|
0.94821092 0.94350282 0.94632768 0.94256121]
|
|
|
|
mean value: 0.9466101694915254
|
|
|
|
key: test_fscore
|
|
value: [0.93650794 0.90909091 0.94017094 0.95798319 0.87301587 0.91935484
|
|
0.93220339 0.90322581 0.91666667 0.95081967]
|
|
|
|
mean value: 0.9239039225852582
|
|
|
|
key: train_fscore
|
|
value: [0.94707521 0.95097132 0.94649446 0.94172063 0.95167286 0.95246971
|
|
0.94921514 0.94434137 0.94727105 0.94367498]
|
|
|
|
mean value: 0.9474906737576759
|
|
|
|
key: test_precision
|
|
value: [0.88059701 0.88709677 0.94827586 0.95 0.82089552 0.87692308
|
|
0.93220339 0.86153846 0.90163934 0.92063492]
|
|
|
|
mean value: 0.8979804366765209
|
|
|
|
key: train_precision
|
|
value: [0.93406593 0.93454545 0.92766727 0.92545455 0.93944954 0.94280443
|
|
0.93115942 0.93053016 0.93090909 0.92572464]
|
|
|
|
mean value: 0.9322310486247966
|
|
|
|
key: test_recall
|
|
value: [1. 0.93220339 0.93220339 0.96610169 0.93220339 0.96610169
|
|
0.93220339 0.94915254 0.93220339 0.98305085]
|
|
|
|
mean value: 0.9525423728813559
|
|
|
|
key: train_recall
|
|
value: [0.96045198 0.96798493 0.96610169 0.95856874 0.96421846 0.96233522
|
|
0.96798493 0.95856874 0.96421846 0.96233522]
|
|
|
|
mean value: 0.963276836158192
|
|
|
|
key: test_roc_auc
|
|
value: [0.93220339 0.90677966 0.94067797 0.95762712 0.86440678 0.91525424
|
|
0.93220339 0.89830508 0.91525424 0.94915254]
|
|
|
|
mean value: 0.9211864406779662
|
|
|
|
key: train_roc_auc
|
|
value: [0.94632768 0.95009416 0.94538606 0.94067797 0.95103578 0.9519774
|
|
0.94821092 0.94350282 0.94632768 0.94256121]
|
|
|
|
mean value: 0.9466101694915254
|
|
|
|
key: test_jcc
|
|
value: [0.88059701 0.83333333 0.88709677 0.91935484 0.77464789 0.85074627
|
|
0.87301587 0.82352941 0.84615385 0.90625 ]
|
|
|
|
mean value: 0.8594725248077018
|
|
|
|
key: train_jcc
|
|
value: [0.8994709 0.90652557 0.89842382 0.88986014 0.90780142 0.90925267
|
|
0.90333919 0.89455185 0.89982425 0.89335664]
|
|
|
|
mean value: 0.9002406451204608
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.46659184 0.59810257 0.43021083 0.51438951 0.46883416 0.36173987
|
|
0.51598859 0.58890104 0.45403695 0.61863995]
|
|
|
|
mean value: 0.5017435312271118
|
|
|
|
key: score_time
|
|
value: [0.02045965 0.02038383 0.01452923 0.01469898 0.01301026 0.04213881
|
|
0.01388741 0.03556108 0.03220654 0.02464271]
|
|
|
|
mean value: 0.023151850700378417
|
|
|
|
key: test_mcc
|
|
value: [0.87246434 0.79844727 0.88148255 0.91643971 0.73560719 0.83483657
|
|
0.86440678 0.80076161 0.83098605 0.9003767 ]
|
|
|
|
mean value: 0.8435808792665732
|
|
|
|
key: train_mcc
|
|
value: [0.89301174 0.90615304 0.89153764 0.89661425 0.90238526 0.90414883
|
|
0.89712369 0.88740859 0.89322736 0.88581541]
|
|
|
|
mean value: 0.895742581156009
|
|
|
|
key: test_accuracy
|
|
value: [0.93220339 0.89830508 0.94067797 0.95762712 0.86440678 0.91525424
|
|
0.93220339 0.89830508 0.91525424 0.94915254]
|
|
|
|
mean value: 0.9203389830508475
|
|
|
|
key: train_accuracy
|
|
value: [0.94632768 0.95291902 0.94538606 0.94821092 0.95103578 0.9519774
|
|
0.94821092 0.94350282 0.94632768 0.94256121]
|
|
|
|
mean value: 0.9476459510357815
|
|
|
|
key: test_fscore
|
|
value: [0.93650794 0.90163934 0.94017094 0.95867769 0.87301587 0.91935484
|
|
0.93220339 0.90322581 0.91666667 0.95081967]
|
|
|
|
mean value: 0.9232282153697071
|
|
|
|
key: train_fscore
|
|
value: [0.94707521 0.9535316 0.94649446 0.94874185 0.95167286 0.95246971
|
|
0.94921514 0.94434137 0.94727105 0.94367498]
|
|
|
|
mean value: 0.9484488229486947
|
|
|
|
key: test_precision
|
|
value: [0.88059701 0.87301587 0.94827586 0.93548387 0.82089552 0.87692308
|
|
0.93220339 0.86153846 0.90163934 0.92063492]
|
|
|
|
mean value: 0.8951207336555276
|
|
|
|
key: train_precision
|
|
value: [0.93406593 0.9412844 0.92766727 0.93911439 0.93944954 0.94280443
|
|
0.93115942 0.93053016 0.93090909 0.92572464]
|
|
|
|
mean value: 0.9342709281061602
|
|
|
|
key: test_recall
|
|
value: [1. 0.93220339 0.93220339 0.98305085 0.93220339 0.96610169
|
|
0.93220339 0.94915254 0.93220339 0.98305085]
|
|
|
|
mean value: 0.9542372881355932
|
|
|
|
key: train_recall
|
|
value: [0.96045198 0.96610169 0.96610169 0.95856874 0.96421846 0.96233522
|
|
0.96798493 0.95856874 0.96421846 0.96233522]
|
|
|
|
mean value: 0.9630885122410546
|
|
|
|
key: test_roc_auc
|
|
value: [0.93220339 0.89830508 0.94067797 0.95762712 0.86440678 0.91525424
|
|
0.93220339 0.89830508 0.91525424 0.94915254]
|
|
|
|
mean value: 0.9203389830508475
|
|
|
|
key: train_roc_auc
|
|
value: [0.94632768 0.95291902 0.94538606 0.94821092 0.95103578 0.9519774
|
|
0.94821092 0.94350282 0.94632768 0.94256121]
|
|
|
|
mean value: 0.9476459510357815
|
|
|
|
key: test_jcc
|
|
value: [0.88059701 0.82089552 0.88709677 0.92063492 0.77464789 0.85074627
|
|
0.87301587 0.82352941 0.84615385 0.90625 ]
|
|
|
|
mean value: 0.8583567519056987
|
|
|
|
key: train_jcc
|
|
value: [0.8994709 0.91119005 0.89842382 0.90248227 0.90780142 0.90925267
|
|
0.90333919 0.89455185 0.89982425 0.89335664]
|
|
|
|
mean value: 0.9019693060941743
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04569697 0.04695821 0.05145335 0.09305191 0.05779958 0.0474658
|
|
0.08844733 0.08017111 0.08264518 0.062783 ]
|
|
|
|
mean value: 0.06564724445343018
|
|
|
|
key: score_time
|
|
value: [0.01327252 0.01644707 0.01665115 0.02403355 0.0164175 0.01286411
|
|
0.03395438 0.01658607 0.01871514 0.01825571]
|
|
|
|
mean value: 0.0187197208404541
|
|
|
|
key: test_mcc
|
|
value: [0.77977303 0.78247589 0.88148255 0.78067087 0.66139706 0.76668665
|
|
0.7395131 0.79661017 0.8136762 0.84855529]
|
|
|
|
mean value: 0.78508408168114
|
|
|
|
key: train_mcc
|
|
value: [0.83806521 0.83456507 0.8325113 0.81544256 0.84936497 0.83442293
|
|
0.80819055 0.84195723 0.84982927 0.82487192]
|
|
|
|
mean value: 0.8329221016770681
|
|
|
|
key: test_accuracy
|
|
value: [0.88983051 0.88983051 0.94067797 0.88983051 0.83050847 0.88135593
|
|
0.86440678 0.89830508 0.90677966 0.92372881]
|
|
|
|
mean value: 0.8915254237288136
|
|
|
|
key: train_accuracy
|
|
value: [0.91902072 0.91713748 0.91619586 0.90772128 0.92467043 0.91713748
|
|
0.9039548 0.92090395 0.92467043 0.91242938]
|
|
|
|
mean value: 0.9163841807909605
|
|
|
|
key: test_fscore
|
|
value: [0.8907563 0.89430894 0.94117647 0.88695652 0.83333333 0.88709677
|
|
0.85185185 0.89830508 0.90756303 0.92561983]
|
|
|
|
mean value: 0.8916968141983129
|
|
|
|
key: train_fscore
|
|
value: [0.91932458 0.91821561 0.91690009 0.90772128 0.9249531 0.91791045
|
|
0.90267176 0.92164179 0.92592593 0.91267606]
|
|
|
|
mean value: 0.9167940637697299
|
|
|
|
key: test_precision
|
|
value: [0.88333333 0.859375 0.93333333 0.91071429 0.81967213 0.84615385
|
|
0.93877551 0.89830508 0.9 0.90322581]
|
|
|
|
mean value: 0.8892888331083797
|
|
|
|
key: train_precision
|
|
value: [0.91588785 0.90642202 0.90925926 0.90772128 0.92149533 0.90942699
|
|
0.91489362 0.91312384 0.91074681 0.91011236]
|
|
|
|
mean value: 0.9119089356531584
|
|
|
|
key: test_recall
|
|
value: [0.89830508 0.93220339 0.94915254 0.86440678 0.84745763 0.93220339
|
|
0.77966102 0.89830508 0.91525424 0.94915254]
|
|
|
|
mean value: 0.8966101694915254
|
|
|
|
key: train_recall
|
|
value: [0.92278719 0.93032015 0.92467043 0.90772128 0.92843691 0.92655367
|
|
0.89077213 0.93032015 0.94161959 0.91525424]
|
|
|
|
mean value: 0.9218455743879472
|
|
|
|
key: test_roc_auc
|
|
value: [0.88983051 0.88983051 0.94067797 0.88983051 0.83050847 0.88135593
|
|
0.86440678 0.89830508 0.90677966 0.92372881]
|
|
|
|
mean value: 0.8915254237288136
|
|
|
|
key: train_roc_auc
|
|
value: [0.91902072 0.91713748 0.91619586 0.90772128 0.92467043 0.91713748
|
|
0.9039548 0.92090395 0.92467043 0.91242938]
|
|
|
|
mean value: 0.9163841807909605
|
|
|
|
key: test_jcc
|
|
value: [0.8030303 0.80882353 0.88888889 0.796875 0.71428571 0.79710145
|
|
0.74193548 0.81538462 0.83076923 0.86153846]
|
|
|
|
mean value: 0.8058632676455308
|
|
|
|
key: train_jcc
|
|
value: [0.85069444 0.84879725 0.84655172 0.83103448 0.86038394 0.84827586
|
|
0.8226087 0.85467128 0.86206897 0.83937824]
|
|
|
|
mean value: 0.8464464888210846
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.01268339 1.77877474 1.95973802 2.26259136 1.77054214 1.85833526
|
|
1.49453092 1.9862752 1.74323559 1.78543139]
|
|
|
|
mean value: 1.8652137994766236
|
|
|
|
key: score_time
|
|
value: [0.01392913 0.02915168 0.01891446 0.01784015 0.01736093 0.01715302
|
|
0.01280761 0.02278137 0.01854086 0.01689649]
|
|
|
|
mean value: 0.018537569046020507
|
|
|
|
key: test_mcc
|
|
value: [0.87246434 0.87246434 0.91855865 0.91643971 0.82319321 0.81934649
|
|
0.91855865 0.81934649 0.9029865 0.85051727]
|
|
|
|
mean value: 0.8713875667274407
|
|
|
|
key: train_mcc
|
|
value: [0.9536651 0.94643559 0.95548024 0.94431365 0.97020544 0.9628886
|
|
0.95004417 0.95535131 0.95004417 0.95004417]
|
|
|
|
mean value: 0.953847245325121
|
|
|
|
key: test_accuracy
|
|
value: [0.93220339 0.93220339 0.95762712 0.95762712 0.90677966 0.90677966
|
|
0.95762712 0.90677966 0.94915254 0.92372881]
|
|
|
|
mean value: 0.9330508474576271
|
|
|
|
key: train_accuracy
|
|
value: [0.97645951 0.97269303 0.97740113 0.97175141 0.98493409 0.98116761
|
|
0.97457627 0.97740113 0.97457627 0.97457627]
|
|
|
|
mean value: 0.9765536723163841
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.93650794 0.93650794 0.95934959 0.95867769 0.91338583 0.912
|
|
0.95934959 0.912 0.9516129 0.92682927]
|
|
|
|
mean value: 0.9366220744248299
|
|
|
|
key: train_fscore
|
|
value: [0.97691597 0.97332107 0.97781885 0.97232472 0.98513011 0.98148148
|
|
0.97511521 0.97777778 0.97511521 0.97511521]
|
|
|
|
mean value: 0.9770115611427798
|
|
|
|
key: test_precision
|
|
value: [0.88059701 0.88059701 0.921875 0.93548387 0.85294118 0.86363636
|
|
0.921875 0.86363636 0.90769231 0.890625 ]
|
|
|
|
mean value: 0.8918959112254111
|
|
|
|
key: train_precision
|
|
value: [0.95833333 0.95143885 0.9600726 0.95298373 0.97247706 0.96539162
|
|
0.95487365 0.96174863 0.95487365 0.95487365]
|
|
|
|
mean value: 0.9587066760528578
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.98305085 0.98305085 0.96610169
|
|
1. 0.96610169 1. 0.96610169]
|
|
|
|
mean value: 0.9864406779661017
|
|
|
|
key: train_recall
|
|
value: [0.99623352 0.99623352 0.99623352 0.99246704 0.99811676 0.99811676
|
|
0.99623352 0.99435028 0.99623352 0.99623352]
|
|
|
|
mean value: 0.996045197740113
|
|
|
|
key: test_roc_auc
|
|
value: [0.93220339 0.93220339 0.95762712 0.95762712 0.90677966 0.90677966
|
|
0.95762712 0.90677966 0.94915254 0.92372881]
|
|
|
|
mean value: 0.9330508474576271
|
|
|
|
key: train_roc_auc
|
|
value: [0.97645951 0.97269303 0.97740113 0.97175141 0.98493409 0.98116761
|
|
0.97457627 0.97740113 0.97457627 0.97457627]
|
|
|
|
mean value: 0.9765536723163841
|
|
|
|
key: test_jcc
|
|
value: [0.88059701 0.88059701 0.921875 0.92063492 0.84057971 0.83823529
|
|
0.921875 0.83823529 0.90769231 0.86363636]
|
|
|
|
mean value: 0.881395792019456
|
|
|
|
key: train_jcc
|
|
value: [0.95487365 0.94802867 0.95660036 0.94614004 0.97069597 0.96363636
|
|
0.95143885 0.95652174 0.95143885 0.95143885]
|
|
|
|
mean value: 0.9550813337840166
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01794887 0.01561499 0.01427388 0.0209291 0.0147872 0.01261377
|
|
0.01367927 0.01953125 0.01602793 0.0179081 ]
|
|
|
|
mean value: 0.01633143424987793
|
|
|
|
key: score_time
|
|
value: [0.01350904 0.01084828 0.01075411 0.01574922 0.0099659 0.00953078
|
|
0.01548767 0.01519847 0.01313114 0.01585126]
|
|
|
|
mean value: 0.013002586364746094
|
|
|
|
key: test_mcc
|
|
value: [0.54879547 0.52816191 0.71278644 0.61450987 0.52816191 0.52816191
|
|
0.46739672 0.6303868 0.55205245 0.57635398]
|
|
|
|
mean value: 0.5686767473288038
|
|
|
|
key: train_mcc
|
|
value: [0.59378253 0.59477378 0.58998839 0.6107166 0.59378253 0.62764522
|
|
0.5967911 0.61169745 0.59650691 0.58449477]
|
|
|
|
mean value: 0.6000179267344538
|
|
|
|
key: test_accuracy
|
|
value: [0.77118644 0.76271186 0.8559322 0.80508475 0.76271186 0.76271186
|
|
0.72881356 0.81355932 0.77118644 0.78813559]
|
|
|
|
mean value: 0.7822033898305085
|
|
|
|
key: train_accuracy
|
|
value: [0.79472693 0.79566855 0.79284369 0.80320151 0.79472693 0.81167608
|
|
0.79661017 0.80414313 0.79190207 0.78907721]
|
|
|
|
mean value: 0.7974576271186441
|
|
|
|
key: test_fscore
|
|
value: [0.75229358 0.75 0.85217391 0.79279279 0.75 0.75
|
|
0.69811321 0.80357143 0.74766355 0.78632479]
|
|
|
|
mean value: 0.7682933257663176
|
|
|
|
key: train_fscore
|
|
value: [0.78156313 0.7840796 0.77955912 0.79079079 0.78156313 0.8
|
|
0.78486056 0.79324056 0.76810073 0.77235772]
|
|
|
|
mean value: 0.7836115336051084
|
|
|
|
key: test_precision
|
|
value: [0.82 0.79245283 0.875 0.84615385 0.79245283 0.79245283
|
|
0.78723404 0.8490566 0.83333333 0.79310345]
|
|
|
|
mean value: 0.8181239764655855
|
|
|
|
key: train_precision
|
|
value: [0.83511777 0.83122363 0.83297645 0.84401709 0.83511777 0.85287846
|
|
0.83298097 0.84 0.86729858 0.8388521 ]
|
|
|
|
mean value: 0.8410462826807681
|
|
|
|
key: test_recall
|
|
value: [0.69491525 0.71186441 0.83050847 0.74576271 0.71186441 0.71186441
|
|
0.62711864 0.76271186 0.6779661 0.77966102]
|
|
|
|
mean value: 0.7254237288135593
|
|
|
|
key: train_recall
|
|
value: [0.73446328 0.74199623 0.73258004 0.74387947 0.73446328 0.75329567
|
|
0.74199623 0.75141243 0.68926554 0.71563089]
|
|
|
|
mean value: 0.7338983050847457
|
|
|
|
key: test_roc_auc
|
|
value: [0.77118644 0.76271186 0.8559322 0.80508475 0.76271186 0.76271186
|
|
0.72881356 0.81355932 0.77118644 0.78813559]
|
|
|
|
mean value: 0.7822033898305085
|
|
|
|
key: train_roc_auc
|
|
value: [0.79472693 0.79566855 0.79284369 0.80320151 0.79472693 0.81167608
|
|
0.79661017 0.80414313 0.79190207 0.78907721]
|
|
|
|
mean value: 0.7974576271186441
|
|
|
|
key: test_jcc
|
|
value: [0.60294118 0.6 0.74242424 0.65671642 0.6 0.6
|
|
0.53623188 0.67164179 0.59701493 0.64788732]
|
|
|
|
mean value: 0.6254857761224821
|
|
|
|
key: train_jcc
|
|
value: [0.64144737 0.64484452 0.63875205 0.65397351 0.64144737 0.66666667
|
|
0.64590164 0.65733114 0.62350937 0.62913907]
|
|
|
|
mean value: 0.6443012701778966
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01723099 0.02549386 0.02094722 0.02133274 0.02272749 0.01733112
|
|
0.02014685 0.01952744 0.01961136 0.01971793]
|
|
|
|
mean value: 0.020406699180603026
|
|
|
|
key: score_time
|
|
value: [0.01311111 0.0146966 0.0149188 0.01480818 0.01499677 0.01578832
|
|
0.0141871 0.0142355 0.01392817 0.01440144]
|
|
|
|
mean value: 0.014507198333740234
|
|
|
|
key: test_mcc
|
|
value: [0.59458839 0.64416033 0.47544349 0.50854763 0.47519096 0.5020187
|
|
0.4115966 0.53313735 0.54433105 0.53032146]
|
|
|
|
mean value: 0.521933596977322
|
|
|
|
key: train_mcc
|
|
value: [0.55713856 0.54062771 0.53186723 0.56519731 0.5825162 0.5300368
|
|
0.5598027 0.55748724 0.53718441 0.5709496 ]
|
|
|
|
mean value: 0.5532807770147895
|
|
|
|
key: test_accuracy
|
|
value: [0.79661017 0.8220339 0.72881356 0.75423729 0.73728814 0.74576271
|
|
0.70338983 0.76271186 0.77118644 0.76271186]
|
|
|
|
mean value: 0.7584745762711864
|
|
|
|
key: train_accuracy
|
|
value: [0.77777778 0.77024482 0.76553672 0.78248588 0.79096045 0.7645951
|
|
0.77966102 0.7787194 0.76836158 0.78531073]
|
|
|
|
mean value: 0.7763653483992468
|
|
|
|
key: test_fscore
|
|
value: [0.80327869 0.82352941 0.76119403 0.75213675 0.74380165 0.76923077
|
|
0.67889908 0.78125 0.76106195 0.77777778]
|
|
|
|
mean value: 0.7652160111649365
|
|
|
|
key: train_fscore
|
|
value: [0.78584392 0.77281192 0.77176902 0.78551532 0.79558011 0.77106227
|
|
0.78413284 0.78016838 0.77306273 0.78888889]
|
|
|
|
mean value: 0.7808835401860388
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.81666667 0.68 0.75862069 0.72580645 0.70422535
|
|
0.74 0.72463768 0.7962963 0.73134328]
|
|
|
|
mean value: 0.7455374198863002
|
|
|
|
key: train_precision
|
|
value: [0.75831874 0.76427256 0.75178571 0.77472527 0.77837838 0.75044563
|
|
0.76853526 0.77509294 0.75768535 0.77595628]
|
|
|
|
mean value: 0.7655196134879092
|
|
|
|
key: test_recall
|
|
value: [0.83050847 0.83050847 0.86440678 0.74576271 0.76271186 0.84745763
|
|
0.62711864 0.84745763 0.72881356 0.83050847]
|
|
|
|
mean value: 0.7915254237288135
|
|
|
|
key: train_recall
|
|
value: [0.81544256 0.78154426 0.79284369 0.79661017 0.81355932 0.79284369
|
|
0.80037665 0.78531073 0.78907721 0.80225989]
|
|
|
|
mean value: 0.7969868173258003
|
|
|
|
key: test_roc_auc
|
|
value: [0.79661017 0.8220339 0.72881356 0.75423729 0.73728814 0.74576271
|
|
0.70338983 0.76271186 0.77118644 0.76271186]
|
|
|
|
mean value: 0.7584745762711864
|
|
|
|
key: train_roc_auc
|
|
value: [0.77777778 0.77024482 0.76553672 0.78248588 0.79096045 0.7645951
|
|
0.77966102 0.7787194 0.76836158 0.78531073]
|
|
|
|
mean value: 0.7763653483992468
|
|
|
|
key: test_jcc
|
|
value: [0.67123288 0.7 0.61445783 0.60273973 0.59210526 0.625
|
|
0.51388889 0.64102564 0.61428571 0.63636364]
|
|
|
|
mean value: 0.6211099577786803
|
|
|
|
key: train_jcc
|
|
value: [0.64723468 0.62974203 0.62835821 0.64678899 0.66055046 0.62742176
|
|
0.64491654 0.63957055 0.63007519 0.65137615]
|
|
|
|
mean value: 0.6406034556193134
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.02968049 0.01939893 0.01988506 0.01444578 0.0134747 0.01292944
|
|
0.0186882 0.01349711 0.01510906 0.01772285]
|
|
|
|
mean value: 0.017483162879943847
|
|
|
|
key: score_time
|
|
value: [0.0452981 0.05153322 0.05499887 0.05851364 0.02815962 0.02332306
|
|
0.02530789 0.02729702 0.02706075 0.0302639 ]
|
|
|
|
mean value: 0.03717560768127441
|
|
|
|
key: test_mcc
|
|
value: [0.77390906 0.78513588 0.82319321 0.81934649 0.65169462 0.74437255
|
|
0.77390906 0.72980045 0.76982148 0.80403577]
|
|
|
|
mean value: 0.7675218567927915
|
|
|
|
key: train_mcc
|
|
value: [0.84710695 0.85223483 0.84295362 0.83131056 0.84669165 0.82866764
|
|
0.83961626 0.84843021 0.84289483 0.84656396]
|
|
|
|
mean value: 0.8426470523770553
|
|
|
|
key: test_accuracy
|
|
value: [0.88135593 0.88135593 0.90677966 0.90677966 0.8220339 0.86440678
|
|
0.88135593 0.8559322 0.88135593 0.89830508]
|
|
|
|
mean value: 0.8779661016949153
|
|
|
|
key: train_accuracy
|
|
value: [0.91996234 0.92184557 0.91713748 0.91054614 0.91996234 0.90960452
|
|
0.91525424 0.91996234 0.9180791 0.9180791 ]
|
|
|
|
mean value: 0.9170433145009417
|
|
|
|
key: test_fscore
|
|
value: [0.890625 0.89393939 0.91338583 0.912 0.83464567 0.87692308
|
|
0.890625 0.87022901 0.88888889 0.9047619 ]
|
|
|
|
mean value: 0.8876023768209844
|
|
|
|
key: train_fscore
|
|
value: [0.92484527 0.92700088 0.92267135 0.91703057 0.92471213 0.91593695
|
|
0.92105263 0.92524186 0.92294066 0.92401747]
|
|
|
|
mean value: 0.9225449776295196
|
|
|
|
key: test_precision
|
|
value: [0.82608696 0.80821918 0.85294118 0.86363636 0.77941176 0.8028169
|
|
0.82608696 0.79166667 0.8358209 0.85074627]
|
|
|
|
mean value: 0.8237433128192726
|
|
|
|
key: train_precision
|
|
value: [0.87166667 0.86963696 0.86490939 0.85504886 0.8729097 0.85597381
|
|
0.86206897 0.8679868 0.87123746 0.86156352]
|
|
|
|
mean value: 0.8653002133466376
|
|
|
|
key: test_recall
|
|
value: [0.96610169 1. 0.98305085 0.96610169 0.89830508 0.96610169
|
|
0.96610169 0.96610169 0.94915254 0.96610169]
|
|
|
|
mean value: 0.9627118644067797
|
|
|
|
key: train_recall
|
|
value: [0.98493409 0.99246704 0.98870056 0.98870056 0.98305085 0.98493409
|
|
0.98870056 0.9905838 0.98116761 0.99623352]
|
|
|
|
mean value: 0.9879472693032015
|
|
|
|
key: test_roc_auc
|
|
value: [0.88135593 0.88135593 0.90677966 0.90677966 0.8220339 0.86440678
|
|
0.88135593 0.8559322 0.88135593 0.89830508]
|
|
|
|
mean value: 0.8779661016949153
|
|
|
|
key: train_roc_auc
|
|
value: [0.91996234 0.92184557 0.91713748 0.91054614 0.91996234 0.90960452
|
|
0.91525424 0.91996234 0.9180791 0.9180791 ]
|
|
|
|
mean value: 0.9170433145009417
|
|
|
|
key: test_jcc
|
|
value: [0.8028169 0.80821918 0.84057971 0.83823529 0.71621622 0.78082192
|
|
0.8028169 0.77027027 0.8 0.82608696]
|
|
|
|
mean value: 0.7986063345978113
|
|
|
|
key: train_jcc
|
|
value: [0.86019737 0.86393443 0.85644372 0.84677419 0.85996705 0.84491115
|
|
0.85365854 0.8608838 0.85690789 0.85876623]
|
|
|
|
mean value: 0.8562444367836273
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09335923 0.07946372 0.09133649 0.09083939 0.07889342 0.08110332
|
|
0.09272361 0.08286762 0.07527542 0.09029579]
|
|
|
|
mean value: 0.08561580181121826
|
|
|
|
key: score_time
|
|
value: [0.02514625 0.02601647 0.02610517 0.0261898 0.02286363 0.0224669
|
|
0.03033996 0.03140903 0.03217936 0.02606082]
|
|
|
|
mean value: 0.026877737045288085
|
|
|
|
key: test_mcc
|
|
value: [0.74672866 0.76315046 0.83242375 0.8136762 0.62856487 0.81461308
|
|
0.69651783 0.77977303 0.79706825 0.81461308]
|
|
|
|
mean value: 0.7687129205211685
|
|
|
|
key: train_mcc
|
|
value: [0.82486022 0.83246405 0.84071369 0.83088575 0.83615967 0.81169911
|
|
0.80982875 0.8305615 0.81357231 0.81003564]
|
|
|
|
mean value: 0.8240780690203461
|
|
|
|
key: test_accuracy
|
|
value: [0.87288136 0.88135593 0.91525424 0.90677966 0.81355932 0.90677966
|
|
0.84745763 0.88983051 0.89830508 0.90677966]
|
|
|
|
mean value: 0.8838983050847458
|
|
|
|
key: train_accuracy
|
|
value: [0.91242938 0.91619586 0.91996234 0.91525424 0.9180791 0.90583804
|
|
0.90489642 0.91525424 0.90677966 0.90489642]
|
|
|
|
mean value: 0.9119585687382298
|
|
|
|
key: test_fscore
|
|
value: [0.87603306 0.88333333 0.91803279 0.90756303 0.81967213 0.90909091
|
|
0.84210526 0.8907563 0.9 0.90909091]
|
|
|
|
mean value: 0.8855677718288165
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
train_fscore
|
|
value: [0.91251176 0.91674462 0.92165899 0.91651206 0.91815616 0.90619137
|
|
0.90534208 0.91573034 0.90651558 0.90604651]
|
|
|
|
mean value: 0.9125409467312853
|
|
|
|
key: test_precision
|
|
value: [0.85483871 0.86885246 0.88888889 0.9 0.79365079 0.88709677
|
|
0.87272727 0.88333333 0.8852459 0.88709677]
|
|
|
|
mean value: 0.8721730907320542
|
|
|
|
key: train_precision
|
|
value: [0.91165414 0.91078067 0.90252708 0.90310786 0.91729323 0.90280374
|
|
0.9011194 0.91061453 0.90909091 0.89522059]
|
|
|
|
mean value: 0.9064212138207337
|
|
|
|
key: test_recall
|
|
value: [0.89830508 0.89830508 0.94915254 0.91525424 0.84745763 0.93220339
|
|
0.81355932 0.89830508 0.91525424 0.93220339]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [0.913371 0.92278719 0.94161959 0.93032015 0.91902072 0.90960452
|
|
0.90960452 0.92090395 0.9039548 0.91713748]
|
|
|
|
mean value: 0.9188323917137476
|
|
|
|
key: test_roc_auc
|
|
value: [0.87288136 0.88135593 0.91525424 0.90677966 0.81355932 0.90677966
|
|
0.84745763 0.88983051 0.89830508 0.90677966]
|
|
|
|
mean value: 0.8838983050847458
|
|
|
|
key: train_roc_auc
|
|
value: [0.91242938 0.91619586 0.91996234 0.91525424 0.9180791 0.90583804
|
|
0.90489642 0.91525424 0.90677966 0.90489642]
|
|
|
|
mean value: 0.9119585687382298
|
|
|
|
key: test_jcc
|
|
value: [0.77941176 0.79104478 0.84848485 0.83076923 0.69444444 0.83333333
|
|
0.72727273 0.8030303 0.81818182 0.83333333]
|
|
|
|
mean value: 0.7959306579675325
|
|
|
|
key: train_jcc
|
|
value: [0.83910035 0.8462867 0.85470085 0.84589041 0.84869565 0.82847341
|
|
0.82705479 0.84455959 0.82901554 0.82823129]
|
|
|
|
mean value: 0.8392008595013721
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [4.98246717 3.10728407 4.65049744 2.86423278 4.47744703 4.2375145
|
|
4.83118272 4.18052816 4.50134373 6.28127408]
|
|
|
|
mean value: 4.411377167701721
|
|
|
|
key: score_time
|
|
value: [0.04345083 0.01275015 0.02020216 0.01275229 0.01558089 0.01545262
|
|
0.01279569 0.01288652 0.0130868 0.02015829]
|
|
|
|
mean value: 0.017911624908447266
|
|
|
|
key: test_mcc
|
|
value: [0.91855865 0.9029865 0.96665725 0.93435318 0.9029865 0.95038193
|
|
0.93435318 0.93435318 0.91855865 0.94928891]
|
|
|
|
mean value: 0.9312477932582808
|
|
|
|
key: train_mcc
|
|
value: [1. 0.98690308 0.99811853 0.99436615 1. 1.
|
|
0.99811853 0.99811853 0.99811853 1. ]
|
|
|
|
mean value: 0.9973743358472725
|
|
|
|
key: test_accuracy
|
|
value: [0.95762712 0.94915254 0.98305085 0.96610169 0.94915254 0.97457627
|
|
0.96610169 0.96610169 0.95762712 0.97457627]
|
|
|
|
mean value: 0.964406779661017
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99340866 0.99905838 0.99717514 1. 1.
|
|
0.99905838 0.99905838 0.99905838 1. ]
|
|
|
|
mean value: 0.9986817325800377
|
|
|
|
key: test_fscore
|
|
value: [0.95934959 0.9516129 0.98333333 0.96721311 0.9516129 0.97520661
|
|
0.96721311 0.96721311 0.95934959 0.97435897]
|
|
|
|
mean value: 0.9656463256968333
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99345182 0.99905927 0.9971831 1. 1.
|
|
0.99905927 0.99905927 0.99905927 1. ]
|
|
|
|
mean value: 0.9986871987636885
|
|
|
|
key: test_precision
|
|
value: [0.921875 0.90769231 0.96721311 0.93650794 0.90769231 0.9516129
|
|
0.93650794 0.93650794 0.921875 0.98275862]
|
|
|
|
mean value: 0.9370243063577985
|
|
|
|
key: train_precision
|
|
value: [1. 0.98698885 0.9981203 0.99438202 1. 1.
|
|
0.9981203 0.9981203 0.9981203 1. ]
|
|
|
|
mean value: 0.9973852073063072
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95762712 0.94915254 0.98305085 0.96610169 0.94915254 0.97457627
|
|
0.96610169 0.96610169 0.95762712 0.97457627]
|
|
|
|
mean value: 0.964406779661017
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99340866 0.99905838 0.99717514 1. 1.
|
|
0.99905838 0.99905838 0.99905838 1. ]
|
|
|
|
mean value: 0.9986817325800377
|
|
|
|
key: test_jcc
|
|
value: [0.921875 0.90769231 0.96721311 0.93650794 0.90769231 0.9516129
|
|
0.93650794 0.93650794 0.921875 0.95 ]
|
|
|
|
mean value: 0.933748444288833
|
|
|
|
key: train_jcc
|
|
value: [1. 0.98698885 0.9981203 0.99438202 1. 1.
|
|
0.9981203 0.9981203 0.9981203 1. ]
|
|
|
|
mean value: 0.9973852073063072
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06977582 0.04433942 0.05822062 0.03766799 0.05319834 0.05120206
|
|
0.05664968 0.0548594 0.04511213 0.041255 ]
|
|
|
|
mean value: 0.05122804641723633
|
|
|
|
key: score_time
|
|
value: [0.01071 0.01587605 0.00943851 0.00949335 0.00974751 0.01035309
|
|
0.00989985 0.00957918 0.00957108 0.00920391]
|
|
|
|
mean value: 0.010387253761291505
|
|
|
|
key: test_mcc
|
|
value: [0.95038193 0.95038193 0.94928891 0.96665725 0.87246434 0.84270097
|
|
0.96665725 0.93435318 0.9029865 0.93220339]
|
|
|
|
mean value: 0.9268075635174898
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.97457627 0.97457627 0.98305085 0.93220339 0.91525424
|
|
0.98305085 0.96610169 0.94915254 0.96610169]
|
|
|
|
mean value: 0.961864406779661
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97520661 0.97520661 0.97478992 0.98333333 0.93650794 0.921875
|
|
0.98333333 0.96721311 0.9516129 0.96610169]
|
|
|
|
mean value: 0.9635180455176644
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9516129 0.9516129 0.96666667 0.96721311 0.88059701 0.85507246
|
|
0.96721311 0.93650794 0.90769231 0.96610169]
|
|
|
|
mean value: 0.9350290120435464
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.98305085 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9949152542372881
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.97457627 0.97457627 0.98305085 0.93220339 0.91525424
|
|
0.98305085 0.96610169 0.94915254 0.96610169]
|
|
|
|
mean value: 0.961864406779661
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9516129 0.9516129 0.95081967 0.96721311 0.88059701 0.85507246
|
|
0.96721311 0.93650794 0.90769231 0.93442623]
|
|
|
|
mean value: 0.9302767660492888
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1645999 0.16440821 0.17281556 0.18405294 0.17806625 0.17669511
|
|
0.2126193 0.20400095 0.20993471 0.22945285]
|
|
|
|
mean value: 0.18966457843780518
|
|
|
|
key: score_time
|
|
value: [0.01873469 0.01904154 0.02024508 0.02046132 0.02098918 0.0263114
|
|
0.02365327 0.02111006 0.0270772 0.02052283]
|
|
|
|
mean value: 0.021814656257629395
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 1. 0.96665725 0.93435318
|
|
1. 1. 1. 0.96665725]
|
|
|
|
mean value: 0.9867667674506322
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 1. 0.98305085 0.96610169
|
|
1. 1. 1. 0.98305085]
|
|
|
|
mean value: 0.9932203389830508
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 1. 0.98333333 0.96721311
|
|
1. 1. 1. 0.98275862]
|
|
|
|
mean value: 0.9933305068777086
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.96721311 0.93650794
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9903721051262034
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 1. 0.98305085 0.96610169
|
|
1. 1. 1. 0.98305085]
|
|
|
|
mean value: 0.9932203389830508
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 1. 0.96721311 0.93650794
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9869822746177289
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01264048 0.01322007 0.01433563 0.01269984 0.01308346 0.01300454
|
|
0.01287937 0.01291203 0.01342368 0.01355386]
|
|
|
|
mean value: 0.013175296783447265
|
|
|
|
key: score_time
|
|
value: [0.00910926 0.01016212 0.01004028 0.00914693 0.00936842 0.00939178
|
|
0.0091629 0.00916362 0.00929356 0.00929928]
|
|
|
|
mean value: 0.009413814544677735
|
|
|
|
key: test_mcc
|
|
value: [0.84270097 0.88762536 0.84270097 0.9029865 0.9029865 0.82807867
|
|
0.91855865 0.9029865 0.87246434 0.87246434]
|
|
|
|
mean value: 0.8773552812296227
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91525424 0.94067797 0.91525424 0.94915254 0.94915254 0.90677966
|
|
0.95762712 0.94915254 0.93220339 0.93220339]
|
|
|
|
mean value: 0.9347457627118644
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.921875 0.944 0.921875 0.9516129 0.9516129 0.91472868
|
|
0.95934959 0.9516129 0.93650794 0.93650794]
|
|
|
|
mean value: 0.938968285835977
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85507246 0.89393939 0.85507246 0.90769231 0.90769231 0.84285714
|
|
0.921875 0.90769231 0.88059701 0.88059701]
|
|
|
|
mean value: 0.8853087417260438
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91525424 0.94067797 0.91525424 0.94915254 0.94915254 0.90677966
|
|
0.95762712 0.94915254 0.93220339 0.93220339]
|
|
|
|
mean value: 0.9347457627118644
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85507246 0.89393939 0.85507246 0.90769231 0.90769231 0.84285714
|
|
0.921875 0.90769231 0.88059701 0.88059701]
|
|
|
|
mean value: 0.8853087417260438
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.60647511 2.71507335 2.7433157 2.73711514 2.63659644 2.70062232
|
|
2.70707631 2.77463102 2.71180391 2.71721077]
|
|
|
|
mean value: 2.7049920082092287
|
|
|
|
key: score_time
|
|
value: [0.1097703 0.10681558 0.10816455 0.1053071 0.09844947 0.10791278
|
|
0.10668993 0.10981441 0.10711312 0.10534501]
|
|
|
|
mean value: 0.10653822422027588
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.98319208 1. 0.93435318 0.93435318
|
|
0.98319208 1. 1. 0.96665725]
|
|
|
|
mean value: 0.9801747774206981
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.99152542 1. 0.96610169 0.96610169
|
|
0.99152542 1. 1. 0.98305085]
|
|
|
|
mean value: 0.9898305084745763
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.99159664 1. 0.96721311 0.96721311
|
|
0.99159664 1. 1. 0.98275862]
|
|
|
|
mean value: 0.9900378127508775
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.98333333 1. 0.93650794 0.93650794
|
|
0.98333333 1. 1. 1. ]
|
|
|
|
mean value: 0.9839682539682539
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.99152542 1. 0.96610169 0.96610169
|
|
0.99152542 1. 1. 0.98305085]
|
|
|
|
mean value: 0.9898305084745763
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.98333333 1. 0.93650794 0.93650794
|
|
0.98333333 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9805784234597794
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.15713882 1.16570544 1.14672661 1.13340163 1.20358348 1.17927504
|
|
1.14252329 1.16222072 1.24357724 1.13701653]
|
|
|
|
mean value: 1.1671168804168701
|
|
|
|
key: score_time
|
|
value: [0.23413515 0.22514319 0.27080369 0.24224401 0.16966796 0.28236008
|
|
0.28255677 0.13839722 0.24488616 0.25576329]
|
|
|
|
mean value: 0.23459575176239014
|
|
|
|
key: test_mcc
|
|
value: [0.96665725 0.96665725 0.98319208 0.98319208 0.93435318 0.8824975
|
|
0.96665725 0.98319208 0.98319208 0.94928891]
|
|
|
|
mean value: 0.9598879648970932
|
|
|
|
key: train_mcc
|
|
value: [0.98876369 0.98690308 0.98876369 0.98690308 0.9924952 0.99059961
|
|
0.98876369 0.98690308 0.98690308 0.98876369]
|
|
|
|
mean value: 0.9885761902428309
|
|
|
|
key: test_accuracy
|
|
value: [0.98305085 0.98305085 0.99152542 0.99152542 0.96610169 0.94067797
|
|
0.98305085 0.99152542 0.99152542 0.97457627]
|
|
|
|
mean value: 0.9796610169491525
|
|
|
|
key: train_accuracy
|
|
value: [0.99435028 0.99340866 0.99435028 0.99340866 0.99623352 0.9952919
|
|
0.99435028 0.99340866 0.99340866 0.99435028]
|
|
|
|
mean value: 0.994256120527307
|
|
|
|
key: test_fscore
|
|
value: [0.98333333 0.98333333 0.99159664 0.99159664 0.96721311 0.94214876
|
|
0.98333333 0.99159664 0.99159664 0.97435897]
|
|
|
|
mean value: 0.9800107404065499
|
|
|
|
key: train_fscore
|
|
value: [0.99438202 0.99345182 0.99438202 0.99345182 0.99624765 0.99530516
|
|
0.99438202 0.99345182 0.99345182 0.99438202]
|
|
|
|
mean value: 0.9942888205529952
|
|
|
|
key: test_precision
|
|
value: [0.96721311 0.96721311 0.98333333 0.98333333 0.93650794 0.91935484
|
|
0.96721311 0.98333333 0.98333333 0.98275862]
|
|
|
|
mean value: 0.9673594073502897
|
|
|
|
key: train_precision
|
|
value: [0.98882682 0.98698885 0.98882682 0.98698885 0.99252336 0.99250936
|
|
0.98882682 0.98698885 0.98698885 0.98882682]
|
|
|
|
mean value: 0.9888295380686266
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.96610169
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9932203389830508
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 0.99811676
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9998116760828625
|
|
|
|
key: test_roc_auc
|
|
value: [0.98305085 0.98305085 0.99152542 0.99152542 0.96610169 0.94067797
|
|
0.98305085 0.99152542 0.99152542 0.97457627]
|
|
|
|
mean value: 0.9796610169491525
|
|
|
|
key: train_roc_auc
|
|
value: [0.99435028 0.99340866 0.99435028 0.99340866 0.99623352 0.9952919
|
|
0.99435028 0.99340866 0.99340866 0.99435028]
|
|
|
|
mean value: 0.9942561205273069
|
|
|
|
key: test_jcc
|
|
value: [0.96721311 0.96721311 0.98333333 0.98333333 0.93650794 0.890625
|
|
0.96721311 0.98333333 0.98333333 0.95 ]
|
|
|
|
mean value: 0.9612105614103564
|
|
|
|
key: train_jcc
|
|
value: [0.98882682 0.98698885 0.98882682 0.98698885 0.99252336 0.99065421
|
|
0.98882682 0.98698885 0.98698885 0.98882682]
|
|
|
|
mean value: 0.9886440222997863
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02915096 0.01742768 0.01737237 0.01737332 0.01735568 0.01733208
|
|
0.01810622 0.01739311 0.01741457 0.01731491]
|
|
|
|
mean value: 0.018624091148376466
|
|
|
|
key: score_time
|
|
value: [0.01275826 0.01249981 0.01261926 0.01255918 0.01249695 0.01246691
|
|
0.012604 0.01249504 0.01248956 0.01245785]
|
|
|
|
mean value: 0.012544679641723632
|
|
|
|
key: test_mcc
|
|
value: [0.59458839 0.64416033 0.47544349 0.50854763 0.47519096 0.5020187
|
|
0.4115966 0.53313735 0.54433105 0.53032146]
|
|
|
|
mean value: 0.521933596977322
|
|
|
|
key: train_mcc
|
|
value: [0.55713856 0.54062771 0.53186723 0.56519731 0.5825162 0.5300368
|
|
0.5598027 0.55748724 0.53718441 0.5709496 ]
|
|
|
|
mean value: 0.5532807770147895
|
|
|
|
key: test_accuracy
|
|
value: [0.79661017 0.8220339 0.72881356 0.75423729 0.73728814 0.74576271
|
|
0.70338983 0.76271186 0.77118644 0.76271186]
|
|
|
|
mean value: 0.7584745762711864
|
|
|
|
key: train_accuracy
|
|
value: [0.77777778 0.77024482 0.76553672 0.78248588 0.79096045 0.7645951
|
|
0.77966102 0.7787194 0.76836158 0.78531073]
|
|
|
|
mean value: 0.7763653483992468
|
|
|
|
key: test_fscore
|
|
value: [0.80327869 0.82352941 0.76119403 0.75213675 0.74380165 0.76923077
|
|
0.67889908 0.78125 0.76106195 0.77777778]
|
|
|
|
mean value: 0.7652160111649365
|
|
|
|
key: train_fscore
|
|
value: [0.78584392 0.77281192 0.77176902 0.78551532 0.79558011 0.77106227
|
|
0.78413284 0.78016838 0.77306273 0.78888889]
|
|
|
|
mean value: 0.7808835401860388
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.81666667 0.68 0.75862069 0.72580645 0.70422535
|
|
0.74 0.72463768 0.7962963 0.73134328]
|
|
|
|
mean value: 0.7455374198863002
|
|
|
|
key: train_precision
|
|
value: [0.75831874 0.76427256 0.75178571 0.77472527 0.77837838 0.75044563
|
|
0.76853526 0.77509294 0.75768535 0.77595628]
|
|
|
|
mean value: 0.7655196134879092
|
|
|
|
key: test_recall
|
|
value: [0.83050847 0.83050847 0.86440678 0.74576271 0.76271186 0.84745763
|
|
0.62711864 0.84745763 0.72881356 0.83050847]
|
|
|
|
mean value: 0.7915254237288135
|
|
|
|
key: train_recall
|
|
value: [0.81544256 0.78154426 0.79284369 0.79661017 0.81355932 0.79284369
|
|
0.80037665 0.78531073 0.78907721 0.80225989]
|
|
|
|
mean value: 0.7969868173258003
|
|
|
|
key: test_roc_auc
|
|
value: [0.79661017 0.8220339 0.72881356 0.75423729 0.73728814 0.74576271
|
|
0.70338983 0.76271186 0.77118644 0.76271186]
|
|
|
|
mean value: 0.7584745762711864
|
|
|
|
key: train_roc_auc
|
|
value: [0.77777778 0.77024482 0.76553672 0.78248588 0.79096045 0.7645951
|
|
0.77966102 0.7787194 0.76836158 0.78531073]
|
|
|
|
mean value: 0.7763653483992468
|
|
|
|
key: test_jcc
|
|
value: [0.67123288 0.7 0.61445783 0.60273973 0.59210526 0.625
|
|
0.51388889 0.64102564 0.61428571 0.63636364]
|
|
|
|
mean value: 0.6211099577786803
|
|
|
|
key: train_jcc
|
|
value: [0.64723468 0.62974203 0.62835821 0.64678899 0.66055046 0.62742176
|
|
0.64491654 0.63957055 0.63007519 0.65137615]
|
|
|
|
mean value: 0.6406034556193134
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.15227723 0.13333368 0.13913131 0.21009135 0.12646198 0.15182567
|
|
0.2724297 0.12285566 0.151649 0.16686201]
|
|
|
|
mean value: 0.16269176006317138
|
|
|
|
key: score_time
|
|
value: [0.01157355 0.01202106 0.01294684 0.01331306 0.01237512 0.01258683
|
|
0.01143575 0.01141262 0.01144981 0.01208377]
|
|
|
|
mean value: 0.01211984157562256
|
|
|
|
key: test_mcc
|
|
value: [0.95038193 0.96665725 0.96665725 0.98319208 0.91855865 0.88762536
|
|
0.96665725 0.95038193 1. 0.98319208]
|
|
|
|
mean value: 0.9573303767194615
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.98305085 0.98305085 0.99152542 0.95762712 0.94067797
|
|
0.98305085 0.97457627 1. 0.99152542]
|
|
|
|
mean value: 0.9779661016949153
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97520661 0.98333333 0.98333333 0.99159664 0.95934959 0.944
|
|
0.98333333 0.97520661 1. 0.99159664]
|
|
|
|
mean value: 0.9786956093947354
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9516129 0.96721311 0.96721311 0.98333333 0.921875 0.89393939
|
|
0.96721311 0.9516129 1. 0.98333333]
|
|
|
|
mean value: 0.9587346211319968
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.98305085 0.98305085 0.99152542 0.95762712 0.94067797
|
|
0.98305085 0.97457627 1. 0.99152542]
|
|
|
|
mean value: 0.9779661016949153
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9516129 0.96721311 0.96721311 0.98333333 0.921875 0.89393939
|
|
0.96721311 0.9516129 1. 0.98333333]
|
|
|
|
mean value: 0.9587346211319968
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05703592 0.10489798 0.08489013 0.0706439 0.07112813 0.10073137
|
|
0.07353306 0.0744009 0.06590748 0.08039069]
|
|
|
|
mean value: 0.07835595607757569
|
|
|
|
key: score_time
|
|
value: [0.0124228 0.02605343 0.01925206 0.01240873 0.01252627 0.01916528
|
|
0.01251388 0.01247072 0.01265025 0.02731133]
|
|
|
|
mean value: 0.016677474975585936
|
|
|
|
key: test_mcc
|
|
value: [0.85051727 0.9029865 0.9003767 0.86891154 0.75106762 0.7395131
|
|
0.83483657 0.72029406 0.85051727 0.80076161]
|
|
|
|
mean value: 0.8219782238276712
|
|
|
|
key: train_mcc
|
|
value: [0.89364635 0.89219718 0.87730071 0.8743829 0.89938387 0.88192068
|
|
0.8882414 0.89219718 0.88465217 0.89349392]
|
|
|
|
mean value: 0.8877416374776488
|
|
|
|
key: test_accuracy
|
|
value: [0.92372881 0.94915254 0.94915254 0.93220339 0.87288136 0.86440678
|
|
0.91525424 0.8559322 0.92372881 0.89830508]
|
|
|
|
mean value: 0.9084745762711864
|
|
|
|
key: train_accuracy
|
|
value: [0.94632768 0.94538606 0.93785311 0.93691149 0.94915254 0.94067797
|
|
0.94350282 0.94538606 0.94161959 0.94632768]
|
|
|
|
mean value: 0.9433145009416196
|
|
|
|
key: test_fscore
|
|
value: [0.92682927 0.9516129 0.95081967 0.93548387 0.88 0.875
|
|
0.91935484 0.86614173 0.92682927 0.90322581]
|
|
|
|
mean value: 0.9135297360354817
|
|
|
|
key: train_fscore
|
|
value: [0.9475621 0.94688645 0.93967093 0.93802035 0.95036765 0.94172063
|
|
0.94495413 0.94688645 0.94322344 0.94746544]
|
|
|
|
mean value: 0.9446757560731506
|
|
|
|
key: test_precision
|
|
value: [0.890625 0.90769231 0.92063492 0.89230769 0.83333333 0.8115942
|
|
0.87692308 0.80882353 0.890625 0.86153846]
|
|
|
|
mean value: 0.8694097524740108
|
|
|
|
key: train_precision
|
|
value: [0.92625899 0.92156863 0.91296625 0.92181818 0.92818671 0.92545455
|
|
0.92128801 0.92156863 0.91800357 0.92779783]
|
|
|
|
mean value: 0.9224911355051559
|
|
|
|
key: test_recall
|
|
value: [0.96610169 1. 0.98305085 0.98305085 0.93220339 0.94915254
|
|
0.96610169 0.93220339 0.96610169 0.94915254]
|
|
|
|
mean value: 0.9627118644067797
|
|
|
|
key: train_recall
|
|
value: [0.96986817 0.97363465 0.96798493 0.95480226 0.97363465 0.95856874
|
|
0.96986817 0.97363465 0.96986817 0.96798493]
|
|
|
|
mean value: 0.967984934086629
|
|
|
|
key: test_roc_auc
|
|
value: [0.92372881 0.94915254 0.94915254 0.93220339 0.87288136 0.86440678
|
|
0.91525424 0.8559322 0.92372881 0.89830508]
|
|
|
|
mean value: 0.9084745762711864
|
|
|
|
key: train_roc_auc
|
|
value: [0.94632768 0.94538606 0.93785311 0.93691149 0.94915254 0.94067797
|
|
0.94350282 0.94538606 0.94161959 0.94632768]
|
|
|
|
mean value: 0.9433145009416196
|
|
|
|
key: test_jcc
|
|
value: [0.86363636 0.90769231 0.90625 0.87878788 0.78571429 0.77777778
|
|
0.85074627 0.76388889 0.86363636 0.82352941]
|
|
|
|
mean value: 0.8421659546555288
|
|
|
|
key: train_jcc
|
|
value: [0.90034965 0.89913043 0.8862069 0.88327526 0.90542907 0.88986014
|
|
0.89565217 0.89913043 0.89254766 0.90017513]
|
|
|
|
mean value: 0.895175685502814
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02337646 0.01637435 0.01669407 0.01646566 0.01642299 0.01643491
|
|
0.01650858 0.02654409 0.01654601 0.01633525]
|
|
|
|
mean value: 0.01817023754119873
|
|
|
|
key: score_time
|
|
value: [0.01241827 0.01220965 0.01227546 0.01225877 0.01240802 0.01221752
|
|
0.01229191 0.01250291 0.01234746 0.01232004]
|
|
|
|
mean value: 0.012325000762939454
|
|
|
|
key: test_mcc
|
|
value: [0.49265895 0.55964367 0.62856487 0.56223688 0.44124875 0.5770176
|
|
0.55603844 0.56223688 0.63296433 0.61095981]
|
|
|
|
mean value: 0.5623570166181687
|
|
|
|
key: train_mcc
|
|
value: [0.58950669 0.57277473 0.56453594 0.57920714 0.58374979 0.58261974
|
|
0.57179481 0.57389963 0.5719374 0.56503166]
|
|
|
|
mean value: 0.5755057533210926
|
|
|
|
key: test_accuracy
|
|
value: [0.74576271 0.77966102 0.81355932 0.77966102 0.72033898 0.78813559
|
|
0.77118644 0.77966102 0.81355932 0.80508475]
|
|
|
|
mean value: 0.7796610169491526
|
|
|
|
key: train_accuracy
|
|
value: [0.79378531 0.78531073 0.78154426 0.78907721 0.79096045 0.79096045
|
|
0.78531073 0.78625235 0.78531073 0.78154426]
|
|
|
|
mean value: 0.7870056497175142
|
|
|
|
key: test_fscore
|
|
value: [0.75409836 0.77586207 0.80701754 0.76785714 0.71304348 0.79338843
|
|
0.74285714 0.76785714 0.8 0.8 ]
|
|
|
|
mean value: 0.7721981310065269
|
|
|
|
key: train_fscore
|
|
value: [0.78508342 0.77559055 0.7734375 0.78252427 0.78235294 0.78571429
|
|
0.77821012 0.77853659 0.77777778 0.77210216]
|
|
|
|
mean value: 0.779132960600472
|
|
|
|
key: test_precision
|
|
value: [0.73015873 0.78947368 0.83636364 0.81132075 0.73214286 0.77419355
|
|
0.84782609 0.81132075 0.8627451 0.82142857]
|
|
|
|
mean value: 0.8016973722121118
|
|
|
|
key: train_precision
|
|
value: [0.81967213 0.81237113 0.80324544 0.80761523 0.81595092 0.80594059
|
|
0.80482897 0.80769231 0.80606061 0.80698152]
|
|
|
|
mean value: 0.8090358853142522
|
|
|
|
key: test_recall
|
|
value: [0.77966102 0.76271186 0.77966102 0.72881356 0.69491525 0.81355932
|
|
0.66101695 0.72881356 0.74576271 0.77966102]
|
|
|
|
mean value: 0.747457627118644
|
|
|
|
key: train_recall
|
|
value: [0.75329567 0.74199623 0.74576271 0.75894539 0.75141243 0.76647834
|
|
0.75329567 0.75141243 0.75141243 0.74011299]
|
|
|
|
mean value: 0.751412429378531
|
|
|
|
key: test_roc_auc
|
|
value: [0.74576271 0.77966102 0.81355932 0.77966102 0.72033898 0.78813559
|
|
0.77118644 0.77966102 0.81355932 0.80508475]
|
|
|
|
mean value: 0.7796610169491526
|
|
|
|
key: train_roc_auc
|
|
value: [0.79378531 0.78531073 0.78154426 0.78907721 0.79096045 0.79096045
|
|
0.78531073 0.78625235 0.78531073 0.78154426]
|
|
|
|
mean value: 0.7870056497175142
|
|
|
|
key: test_jcc
|
|
value: [0.60526316 0.63380282 0.67647059 0.62318841 0.55405405 0.65753425
|
|
0.59090909 0.62318841 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6297744099497463
|
|
|
|
key: train_jcc
|
|
value: [0.64620355 0.63344051 0.63057325 0.64274322 0.64251208 0.64705882
|
|
0.63694268 0.63738019 0.63636364 0.6288 ]
|
|
|
|
mean value: 0.6382017942727495
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03120732 0.03653789 0.0377636 0.03825927 0.03968382 0.03102398
|
|
0.03601336 0.04227948 0.03995371 0.03419662]
|
|
|
|
mean value: 0.036691904067993164
|
|
|
|
key: score_time
|
|
value: [0.01259232 0.01235986 0.01245022 0.01227665 0.01231623 0.0122993
|
|
0.01240206 0.01257229 0.01233935 0.01235294]
|
|
|
|
mean value: 0.012396121025085449
|
|
|
|
key: test_mcc
|
|
value: [0.71278644 0.6393294 0.86640023 0.79930525 0.7013929 0.66258916
|
|
0.83098605 0.72923266 0.82807867 0.85348593]
|
|
|
|
mean value: 0.7623586689140804
|
|
|
|
key: train_mcc
|
|
value: [0.78927878 0.66300893 0.85942942 0.74644809 0.86485938 0.77682066
|
|
0.84952247 0.87233583 0.85152744 0.81311711]
|
|
|
|
mean value: 0.8086348114597318
|
|
|
|
key: test_accuracy
|
|
value: [0.8559322 0.79661017 0.93220339 0.88983051 0.84745763 0.80508475
|
|
0.91525424 0.86440678 0.90677966 0.92372881]
|
|
|
|
mean value: 0.873728813559322
|
|
|
|
key: train_accuracy
|
|
value: [0.89453861 0.81073446 0.92937853 0.85781544 0.93126177 0.87853107
|
|
0.92372881 0.93596987 0.92090395 0.9039548 ]
|
|
|
|
mean value: 0.8986817325800377
|
|
|
|
key: test_fscore
|
|
value: [0.85950413 0.75 0.92982456 0.90076336 0.85714286 0.83687943
|
|
0.91666667 0.86666667 0.91472868 0.928 ]
|
|
|
|
mean value: 0.8760176357684386
|
|
|
|
key: train_fscore
|
|
value: [0.89571695 0.77080958 0.92795389 0.87551525 0.93369664 0.89077053
|
|
0.92629663 0.93499044 0.92644483 0.90909091]
|
|
|
|
mean value: 0.8991285654690704
|
|
|
|
key: test_precision
|
|
value: [0.83870968 0.97297297 0.96363636 0.81944444 0.80597015 0.7195122
|
|
0.90163934 0.85245902 0.84285714 0.87878788]
|
|
|
|
mean value: 0.8595989185149577
|
|
|
|
key: train_precision
|
|
value: [0.88581952 0.97687861 0.94705882 0.77859238 0.90175439 0.80923077
|
|
0.89612676 0.94951456 0.86579378 0.86294416]
|
|
|
|
mean value: 0.8873713754781185
|
|
|
|
key: test_recall
|
|
value: [0.88135593 0.61016949 0.89830508 1. 0.91525424 1.
|
|
0.93220339 0.88135593 1. 0.98305085]
|
|
|
|
mean value: 0.9101694915254237
|
|
|
|
key: train_recall
|
|
value: [0.90583804 0.63653484 0.90960452 1. 0.96798493 0.9905838
|
|
0.95856874 0.92090395 0.99623352 0.96045198]
|
|
|
|
mean value: 0.9246704331450094
|
|
|
|
key: test_roc_auc
|
|
value: [0.8559322 0.79661017 0.93220339 0.88983051 0.84745763 0.80508475
|
|
0.91525424 0.86440678 0.90677966 0.92372881]
|
|
|
|
mean value: 0.8737288135593221
|
|
|
|
key: train_roc_auc
|
|
value: [0.89453861 0.81073446 0.92937853 0.85781544 0.93126177 0.87853107
|
|
0.92372881 0.93596987 0.92090395 0.9039548 ]
|
|
|
|
mean value: 0.8986817325800376
|
|
|
|
key: test_jcc
|
|
value: [0.75362319 0.6 0.86885246 0.81944444 0.75 0.7195122
|
|
0.84615385 0.76470588 0.84285714 0.86567164]
|
|
|
|
mean value: 0.7830820800143561
|
|
|
|
key: train_jcc
|
|
value: [0.81112985 0.6270872 0.8655914 0.77859238 0.87563884 0.80305344
|
|
0.86271186 0.87791741 0.862969 0.83333333]
|
|
|
|
mean value: 0.8198024713998739
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03353572 0.04238939 0.03613663 0.04180646 0.03821373 0.03223491
|
|
0.04386687 0.03983498 0.0401926 0.03743339]
|
|
|
|
mean value: 0.03856446743011475
|
|
|
|
key: score_time
|
|
value: [0.01237369 0.01254177 0.01233697 0.01436782 0.01233792 0.01252055
|
|
0.01247478 0.01518011 0.01236892 0.01235843]
|
|
|
|
mean value: 0.01288609504699707
|
|
|
|
key: test_mcc
|
|
value: [0.75106762 0.54470478 0.83825006 0.84270097 0.45272433 0.56079971
|
|
0.8136762 0.6393294 0.66258916 0.8824975 ]
|
|
|
|
mean value: 0.6988339720242075
|
|
|
|
key: train_mcc
|
|
value: [0.84498451 0.48807669 0.84543678 0.81879383 0.50945585 0.64744786
|
|
0.86840075 0.78439925 0.59713617 0.85506488]
|
|
|
|
mean value: 0.7259196572239568
|
|
|
|
key: test_accuracy
|
|
value: [0.87288136 0.72881356 0.91525424 0.91525424 0.69491525 0.77118644
|
|
0.90677966 0.79661017 0.80508475 0.94067797]
|
|
|
|
mean value: 0.8347457627118644
|
|
|
|
key: train_accuracy
|
|
value: [0.92090395 0.69679849 0.91713748 0.90207156 0.70715631 0.80037665
|
|
0.93314501 0.88135593 0.7645951 0.92749529]
|
|
|
|
mean value: 0.8451035781544256
|
|
|
|
key: test_fscore
|
|
value: [0.86486486 0.62790698 0.92063492 0.921875 0.59090909 0.73786408
|
|
0.90598291 0.82857143 0.83687943 0.94214876]
|
|
|
|
mean value: 0.8177637458331992
|
|
|
|
key: train_fscore
|
|
value: [0.91732283 0.56951872 0.92334495 0.91049914 0.58698539 0.75462963
|
|
0.93539581 0.89376054 0.8088685 0.92701422]
|
|
|
|
mean value: 0.8227339733313342
|
|
|
|
key: test_precision
|
|
value: [0.92307692 1. 0.86567164 0.85507246 0.89655172 0.86363636
|
|
0.9137931 0.71604938 0.7195122 0.91935484]
|
|
|
|
mean value: 0.8672718636406332
|
|
|
|
key: train_precision
|
|
value: [0.96082474 0.98156682 0.85899514 0.83835182 0.9954955 0.97897898
|
|
0.90492958 0.80916031 0.68082368 0.93320611]
|
|
|
|
mean value: 0.8942332667788556
|
|
|
|
key: test_recall
|
|
value: [0.81355932 0.45762712 0.98305085 1. 0.44067797 0.6440678
|
|
0.89830508 0.98305085 1. 0.96610169]
|
|
|
|
mean value: 0.8186440677966101
|
|
|
|
key: train_recall
|
|
value: [0.87758945 0.40112994 0.99811676 0.99623352 0.41619586 0.61393597
|
|
0.96798493 0.99811676 0.99623352 0.92090395]
|
|
|
|
mean value: 0.8186440677966101
|
|
|
|
key: test_roc_auc
|
|
value: [0.87288136 0.72881356 0.91525424 0.91525424 0.69491525 0.77118644
|
|
0.90677966 0.79661017 0.80508475 0.94067797]
|
|
|
|
mean value: 0.8347457627118644
|
|
|
|
key: train_roc_auc
|
|
value: [0.92090395 0.69679849 0.91713748 0.90207156 0.70715631 0.80037665
|
|
0.93314501 0.88135593 0.7645951 0.92749529]
|
|
|
|
mean value: 0.8451035781544256
|
|
|
|
key: test_jcc
|
|
value: [0.76190476 0.45762712 0.85294118 0.85507246 0.41935484 0.58461538
|
|
0.828125 0.70731707 0.7195122 0.890625 ]
|
|
|
|
mean value: 0.7077095012405279
|
|
|
|
key: train_jcc
|
|
value: [0.84727273 0.39813084 0.85760518 0.835703 0.41541353 0.60594796
|
|
0.87863248 0.80792683 0.67907574 0.8639576 ]
|
|
|
|
mean value: 0.7189665880392169
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.32559657 0.28743625 0.30014992 0.28520703 0.27994108 0.28250217
|
|
0.29276633 0.28248096 0.28291345 0.28319669]
|
|
|
|
mean value: 0.29021904468536375
|
|
|
|
key: score_time
|
|
value: [0.01759338 0.01758766 0.01619363 0.0159905 0.01645374 0.01673961
|
|
0.01747441 0.01601696 0.01598048 0.0161128 ]
|
|
|
|
mean value: 0.016614317893981934
|
|
|
|
key: test_mcc
|
|
value: [0.93435318 0.91643971 0.95038193 0.96610169 0.88762536 0.86640023
|
|
0.91855865 0.83483657 0.96665725 0.93435318]
|
|
|
|
mean value: 0.9175707767316577
|
|
|
|
key: train_mcc
|
|
value: [0.96250591 0.96083542 0.94781402 0.9646034 0.96244445 0.97942506
|
|
0.97213937 0.96450755 0.97213937 0.95881361]
|
|
|
|
mean value: 0.9645228154648972
|
|
|
|
key: test_accuracy
|
|
value: [0.96610169 0.95762712 0.97457627 0.98305085 0.94067797 0.93220339
|
|
0.95762712 0.91525424 0.98305085 0.96610169]
|
|
|
|
mean value: 0.9576271186440678
|
|
|
|
key: train_accuracy
|
|
value: [0.98116761 0.98022599 0.97363465 0.98210923 0.98116761 0.98964218
|
|
0.98587571 0.98210923 0.98587571 0.97928437]
|
|
|
|
mean value: 0.9821092278719398
|
|
|
|
key: test_fscore
|
|
value: [0.96721311 0.95867769 0.97520661 0.98305085 0.944 0.93442623
|
|
0.95934959 0.91935484 0.98333333 0.96491228]
|
|
|
|
mean value: 0.9589524535481283
|
|
|
|
key: train_fscore
|
|
value: [0.98134328 0.98050139 0.97407407 0.9823584 0.98130841 0.98972923
|
|
0.98607242 0.98232558 0.98607242 0.97951583]
|
|
|
|
mean value: 0.9823301046493182
|
|
|
|
key: test_precision
|
|
value: [0.93650794 0.93548387 0.9516129 0.98305085 0.89393939 0.9047619
|
|
0.921875 0.87692308 0.96721311 1. ]
|
|
|
|
mean value: 0.9371368048537586
|
|
|
|
key: train_precision
|
|
value: [0.97227357 0.96703297 0.95810565 0.96886447 0.97402597 0.98148148
|
|
0.97252747 0.97058824 0.97252747 0.96869245]
|
|
|
|
mean value: 0.9706119735207276
|
|
|
|
key: test_recall
|
|
value: [1. 0.98305085 1. 0.98305085 1. 0.96610169
|
|
1. 0.96610169 1. 0.93220339]
|
|
|
|
mean value: 0.9830508474576272
|
|
|
|
key: train_recall
|
|
value: [0.9905838 0.99435028 0.9905838 0.99623352 0.98870056 0.99811676
|
|
1. 0.99435028 1. 0.9905838 ]
|
|
|
|
mean value: 0.9943502824858758
|
|
|
|
key: test_roc_auc
|
|
value: [0.96610169 0.95762712 0.97457627 0.98305085 0.94067797 0.93220339
|
|
0.95762712 0.91525424 0.98305085 0.96610169]
|
|
|
|
mean value: 0.9576271186440678
|
|
|
|
key: train_roc_auc
|
|
value: [0.98116761 0.98022599 0.97363465 0.98210923 0.98116761 0.98964218
|
|
0.98587571 0.98210923 0.98587571 0.97928437]
|
|
|
|
mean value: 0.9821092278719398
|
|
|
|
key: test_jcc
|
|
value: [0.93650794 0.92063492 0.9516129 0.96666667 0.89393939 0.87692308
|
|
0.921875 0.85074627 0.96721311 0.93220339]
|
|
|
|
mean value: 0.9218322671139124
|
|
|
|
key: train_jcc
|
|
value: [0.96336996 0.96174863 0.94945848 0.96532847 0.96330275 0.97966728
|
|
0.97252747 0.96526508 0.97252747 0.95985401]
|
|
|
|
mean value: 0.9653049625181127
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22442031 0.22552896 0.23321509 0.24687886 0.23034453 0.23689151
|
|
0.23136735 0.23099875 0.24369073 0.24089026]
|
|
|
|
mean value: 0.2344226360321045
|
|
|
|
key: score_time
|
|
value: [0.04521799 0.02917409 0.04406667 0.04181027 0.0361836 0.03925753
|
|
0.04302955 0.02166939 0.02578139 0.03268552]
|
|
|
|
mean value: 0.03588759899139404
|
|
|
|
key: test_mcc
|
|
value: [0.95038193 0.93435318 0.93435318 0.98319208 0.9029865 0.85051727
|
|
0.95038193 0.93435318 0.96665725 0.93220339]
|
|
|
|
mean value: 0.933937989103249
|
|
|
|
key: train_mcc
|
|
value: [0.99811853 0.99811853 0.99436615 1. 1. 0.99811853
|
|
0.99811853 0.99811853 0.99624059 0.99811853]
|
|
|
|
mean value: 0.9979317925280032
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.96610169 0.96610169 0.99152542 0.94915254 0.92372881
|
|
0.97457627 0.96610169 0.98305085 0.96610169]
|
|
|
|
mean value: 0.9661016949152542
|
|
|
|
key: train_accuracy
|
|
value: [0.99905838 0.99905838 0.99717514 1. 1. 0.99905838
|
|
0.99905838 0.99905838 0.99811676 0.99905838]
|
|
|
|
mean value: 0.9989642184557439
|
|
|
|
key: test_fscore
|
|
value: [0.97520661 0.96721311 0.96721311 0.99159664 0.9516129 0.92682927
|
|
0.97520661 0.96721311 0.98333333 0.96610169]
|
|
|
|
mean value: 0.967152640582533
|
|
|
|
key: train_fscore
|
|
value: [0.99905749 0.99905927 0.9971831 1. 1. 0.99905749
|
|
0.99905927 0.99905927 0.9981203 0.99905927]
|
|
|
|
mean value: 0.9989655450116454
|
|
|
|
key: test_precision
|
|
value: [0.9516129 0.93650794 0.93650794 0.98333333 0.90769231 0.890625
|
|
0.9516129 0.93650794 0.96721311 0.96610169]
|
|
|
|
mean value: 0.9427715066670416
|
|
|
|
key: train_precision
|
|
value: [1. 0.9981203 0.99438202 1. 1. 1.
|
|
0.9981203 0.9981203 0.99624765 0.9981203 ]
|
|
|
|
mean value: 0.9983110880263669
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.96610169
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9932203389830508
|
|
|
|
key: train_recall
|
|
value: [0.99811676 1. 1. 1. 1. 0.99811676
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996233521657251
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.96610169 0.96610169 0.99152542 0.94915254 0.92372881
|
|
0.97457627 0.96610169 0.98305085 0.96610169]
|
|
|
|
mean value: 0.9661016949152542
|
|
|
|
key: train_roc_auc
|
|
value: [0.99905838 0.99905838 0.99717514 1. 1. 0.99905838
|
|
0.99905838 0.99905838 0.99811676 0.99905838]
|
|
|
|
mean value: 0.9989642184557439
|
|
|
|
key: test_jcc
|
|
value: [0.9516129 0.93650794 0.93650794 0.98333333 0.90769231 0.86363636
|
|
0.9516129 0.93650794 0.96721311 0.93442623]
|
|
|
|
mean value: 0.9369050964899722
|
|
|
|
key: train_jcc
|
|
value: [0.99811676 0.9981203 0.99438202 1. 1. 0.99811676
|
|
0.9981203 0.9981203 0.99624765 0.9981203 ]
|
|
|
|
mean value: 0.997934440192092
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.62902117 0.57681036 0.4777236 0.55264854 0.58565617 0.53426766
|
|
0.5144701 0.57419205 0.49696207 0.61380553]
|
|
|
|
mean value: 0.5555557250976563
|
|
|
|
key: score_time
|
|
value: [0.05246711 0.04758549 0.0475347 0.05290985 0.04760146 0.04767299
|
|
0.04758739 0.04728246 0.0514946 0.04792237]
|
|
|
|
mean value: 0.0490058422088623
|
|
|
|
key: test_mcc
|
|
value: [0.9029865 0.80830501 0.9003767 0.93435318 0.85749293 0.87246434
|
|
0.93220339 0.91855865 0.91855865 0.8824975 ]
|
|
|
|
mean value: 0.892779686959712
|
|
|
|
key: train_mcc
|
|
value: [0.98134164 0.97556114 0.98134164 0.98319208 0.98134164 0.97765081
|
|
0.97374517 0.98134164 0.98319208 0.98319208]
|
|
|
|
mean value: 0.9801899937929628
|
|
|
|
key: test_accuracy
|
|
value: [0.94915254 0.89830508 0.94915254 0.96610169 0.92372881 0.93220339
|
|
0.96610169 0.95762712 0.95762712 0.94067797]
|
|
|
|
mean value: 0.9440677966101695
|
|
|
|
key: train_accuracy
|
|
value: [0.9905838 0.98775895 0.9905838 0.99152542 0.9905838 0.98870056
|
|
0.98681733 0.9905838 0.99152542 0.99152542]
|
|
|
|
mean value: 0.9900188323917137
|
|
|
|
key: test_fscore
|
|
value: [0.9516129 0.90625 0.95081967 0.96721311 0.92913386 0.93650794
|
|
0.96610169 0.95934959 0.95934959 0.94214876]
|
|
|
|
mean value: 0.9468487127124408
|
|
|
|
key: train_fscore
|
|
value: [0.99067164 0.98781631 0.99067164 0.99159664 0.99067164 0.98882682
|
|
0.98691589 0.99067164 0.99159664 0.99159664]
|
|
|
|
mean value: 0.9901035494027427
|
|
|
|
key: test_precision
|
|
value: [0.90769231 0.84057971 0.92063492 0.93650794 0.86764706 0.88059701
|
|
0.96610169 0.921875 0.921875 0.91935484]
|
|
|
|
mean value: 0.9082865482353927
|
|
|
|
key: train_precision
|
|
value: [0.98151571 0.98320896 0.98151571 0.98333333 0.98151571 0.97790055
|
|
0.97959184 0.98151571 0.98333333 0.98333333]
|
|
|
|
mean value: 0.9816764191025169
|
|
|
|
key: test_recall
|
|
value: [1. 0.98305085 0.98305085 1. 1. 1.
|
|
0.96610169 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9898305084745763
|
|
|
|
key: train_recall
|
|
value: [1. 0.99246704 1. 1. 1. 1.
|
|
0.99435028 1. 1. 1. ]
|
|
|
|
mean value: 0.9986817325800377
|
|
|
|
key: test_roc_auc
|
|
value: [0.94915254 0.89830508 0.94915254 0.96610169 0.92372881 0.93220339
|
|
0.96610169 0.95762712 0.95762712 0.94067797]
|
|
|
|
mean value: 0.9440677966101695
|
|
|
|
key: train_roc_auc
|
|
value: [0.9905838 0.98775895 0.9905838 0.99152542 0.9905838 0.98870056
|
|
0.98681733 0.9905838 0.99152542 0.99152542]
|
|
|
|
mean value: 0.9900188323917137
|
|
|
|
key: test_jcc
|
|
value: [0.90769231 0.82857143 0.90625 0.93650794 0.86764706 0.88059701
|
|
0.93442623 0.921875 0.921875 0.890625 ]
|
|
|
|
mean value: 0.8996066976028773
|
|
|
|
key: train_jcc
|
|
value: [0.98151571 0.97592593 0.98151571 0.98333333 0.98151571 0.97790055
|
|
0.97416974 0.98151571 0.98333333 0.98333333]
|
|
|
|
mean value: 0.9804059066689937
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.32374096 1.32716203 1.36708903 1.44498086 1.46977258 1.50028253
|
|
1.45721936 1.36414647 1.24235535 1.22067571]
|
|
|
|
mean value: 1.3717424869537354
|
|
|
|
key: score_time
|
|
value: [0.00989652 0.00964761 0.01064444 0.01272869 0.01468801 0.01426196
|
|
0.01052427 0.00988102 0.00979495 0.00985599]
|
|
|
|
mean value: 0.01119234561920166
|
|
|
|
key: test_mcc
|
|
value: [0.95038193 0.96665725 0.95038193 0.98319208 0.88762536 0.85749293
|
|
0.95038193 0.91855865 0.95038193 0.93220339]
|
|
|
|
mean value: 0.934725736552945
|
|
|
|
key: train_mcc
|
|
value: [0.99436615 0.9924952 0.99436615 0.9924952 0.99436615 0.99624059
|
|
0.9924952 0.98690308 0.9924952 0.99436615]
|
|
|
|
mean value: 0.9930589094573364
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.98305085 0.97457627 0.99152542 0.94067797 0.92372881
|
|
0.97457627 0.95762712 0.97457627 0.96610169]
|
|
|
|
mean value: 0.9661016949152542
|
|
|
|
key: train_accuracy
|
|
value: [0.99717514 0.99623352 0.99717514 0.99623352 0.99717514 0.99811676
|
|
0.99623352 0.99340866 0.99623352 0.99717514]
|
|
|
|
mean value: 0.9965160075329567
|
|
|
|
key: test_fscore
|
|
value: [0.97520661 0.98333333 0.97520661 0.99159664 0.944 0.92913386
|
|
0.97520661 0.95934959 0.97520661 0.96610169]
|
|
|
|
mean value: 0.9674341564948693
|
|
|
|
key: train_fscore
|
|
value: [0.9971831 0.99624765 0.9971831 0.99624765 0.9971831 0.9981203
|
|
0.99624765 0.99345182 0.99624765 0.9971831 ]
|
|
|
|
mean value: 0.9965295138389744
|
|
|
|
key: test_precision
|
|
value: [0.9516129 0.96721311 0.9516129 0.98333333 0.89393939 0.86764706
|
|
0.9516129 0.921875 0.9516129 0.96610169]
|
|
|
|
mean value: 0.9406561208668835
|
|
|
|
key: train_precision
|
|
value: [0.99438202 0.99252336 0.99438202 0.99252336 0.99438202 0.99624765
|
|
0.99252336 0.98698885 0.99252336 0.99438202]
|
|
|
|
mean value: 0.9930858050199449
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.98305085 0.97457627 0.99152542 0.94067797 0.92372881
|
|
0.97457627 0.95762712 0.97457627 0.96610169]
|
|
|
|
mean value: 0.9661016949152542
|
|
|
|
key: train_roc_auc
|
|
value: [0.99717514 0.99623352 0.99717514 0.99623352 0.99717514 0.99811676
|
|
0.99623352 0.99340866 0.99623352 0.99717514]
|
|
|
|
mean value: 0.9965160075329568
|
|
|
|
key: test_jcc
|
|
value: [0.9516129 0.96721311 0.9516129 0.98333333 0.89393939 0.86764706
|
|
0.9516129 0.921875 0.9516129 0.93442623]
|
|
|
|
mean value: 0.9374885743261778
|
|
|
|
key: train_jcc
|
|
value: [0.99438202 0.99252336 0.99438202 0.99252336 0.99438202 0.99624765
|
|
0.99252336 0.98698885 0.99252336 0.99438202]
|
|
|
|
mean value: 0.9930858050199449
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.08893824 0.08478069 0.07039428 0.08516455 0.06953406 0.06511164
|
|
0.06004405 0.17379737 0.08299851 0.07840657]
|
|
|
|
mean value: 0.08591699600219727
|
|
|
|
key: score_time
|
|
value: [0.01579905 0.02201867 0.01744866 0.01705718 0.01498961 0.01760507
|
|
0.02272463 0.02153397 0.01648879 0.0221436 ]
|
|
|
|
mean value: 0.01878092288970947
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.96610169 0.98319208 1. 1.
|
|
1. 0.96665725 1. 0.96665725]
|
|
|
|
mean value: 0.9882608265369438
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.98305085 0.99152542 1. 1.
|
|
1. 0.98305085 1. 0.98305085]
|
|
|
|
mean value: 0.9940677966101695
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.98305085 0.99159664 1. 1.
|
|
1. 0.98333333 1. 0.98275862]
|
|
|
|
mean value: 0.9940739440136077
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.98305085 0.98333333 1. 1.
|
|
1. 0.96721311 1. 1. ]
|
|
|
|
mean value: 0.9933597295545059
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
|
|
value: [1. 1. 0.98305085 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9949152542372881
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.98305085 0.99152542 1. 1.
|
|
1. 0.98305085 1. 0.98305085]
|
|
|
|
mean value: 0.9940677966101695
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.96666667 0.98333333 1. 1.
|
|
1. 0.96721311 1. 0.96610169]
|
|
|
|
mean value: 0.9883314809669352
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0482862 0.03054929 0.06174874 0.03197217 0.03798652 0.03481722
|
|
0.02996325 0.03540969 0.04007578 0.03872442]
|
|
|
|
mean value: 0.038953328132629396
|
|
|
|
key: score_time
|
|
value: [0.03289366 0.03346395 0.02644134 0.03097939 0.03693819 0.01755142
|
|
0.01751828 0.01510334 0.02630544 0.03488922]
|
|
|
|
mean value: 0.027208423614501952
|
|
|
|
key: test_mcc
|
|
value: [0.79661017 0.84855529 0.89882165 0.89830508 0.66254135 0.73049431
|
|
0.79706825 0.74586985 0.8136762 0.88453796]
|
|
|
|
mean value: 0.8076480127148422
|
|
|
|
key: train_mcc
|
|
value: [0.84181388 0.84210065 0.8551741 0.85340401 0.87233583 0.8645049
|
|
0.832405 0.84779596 0.86103127 0.85553844]
|
|
|
|
mean value: 0.8526104031198265
|
|
|
|
key: test_accuracy
|
|
value: [0.89830508 0.92372881 0.94915254 0.94915254 0.83050847 0.86440678
|
|
0.89830508 0.87288136 0.90677966 0.94067797]
|
|
|
|
mean value: 0.9033898305084747
|
|
|
|
key: train_accuracy
|
|
value: [0.92090395 0.92090395 0.92749529 0.92655367 0.93596987 0.93220339
|
|
0.91619586 0.92372881 0.93032015 0.92749529]
|
|
|
|
mean value: 0.9261770244821093
|
|
|
|
key: test_fscore
|
|
value: [0.89830508 0.92561983 0.95 0.94915254 0.83606557 0.86885246
|
|
0.89655172 0.87394958 0.90598291 0.94308943]
|
|
|
|
mean value: 0.9047569135463351
|
|
|
|
key: train_fscore
|
|
value: [0.92105263 0.92193309 0.92823858 0.92750929 0.93692022 0.93271028
|
|
0.91643192 0.92479109 0.93135436 0.92876966]
|
|
|
|
mean value: 0.9269711126064163
|
|
|
|
key: test_precision
|
|
value: [0.89830508 0.90322581 0.93442623 0.94915254 0.80952381 0.84126984
|
|
0.9122807 0.86666667 0.9137931 0.90625 ]
|
|
|
|
mean value: 0.8934893785741433
|
|
|
|
key: train_precision
|
|
value: [0.91932458 0.91009174 0.91881919 0.91559633 0.92321755 0.9257885
|
|
0.91385768 0.91208791 0.91773309 0.91272727]
|
|
|
|
mean value: 0.9169243839236163
|
|
|
|
key: test_recall
|
|
value: [0.89830508 0.94915254 0.96610169 0.94915254 0.86440678 0.89830508
|
|
0.88135593 0.88135593 0.89830508 0.98305085]
|
|
|
|
mean value: 0.9169491525423729
|
|
|
|
key: train_recall
|
|
value: [0.92278719 0.93408663 0.93785311 0.93973635 0.95103578 0.93973635
|
|
0.91902072 0.93785311 0.94538606 0.94538606]
|
|
|
|
mean value: 0.9372881355932203
|
|
|
|
key: test_roc_auc
|
|
value: [0.89830508 0.92372881 0.94915254 0.94915254 0.83050847 0.86440678
|
|
0.89830508 0.87288136 0.90677966 0.94067797]
|
|
|
|
mean value: 0.9033898305084747
|
|
|
|
key: train_roc_auc
|
|
value: [0.92090395 0.92090395 0.92749529 0.92655367 0.93596987 0.93220339
|
|
0.91619586 0.92372881 0.93032015 0.92749529]
|
|
|
|
mean value: 0.9261770244821093
|
|
|
|
key: test_jcc
|
|
value: [0.81538462 0.86153846 0.9047619 0.90322581 0.71830986 0.76811594
|
|
0.8125 0.7761194 0.828125 0.89230769]
|
|
|
|
mean value: 0.8280388684613277
|
|
|
|
key: train_jcc
|
|
value: [0.85365854 0.85517241 0.86608696 0.86481802 0.88132635 0.87390543
|
|
0.8457539 0.86010363 0.87152778 0.86701209]
|
|
|
|
mean value: 0.8639365106776855
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.63806415 0.48378158 0.38886786 0.55602002 0.50151277 0.62127686
|
|
0.67787075 0.38051081 0.42908239 0.40732956]
|
|
|
|
mean value: 0.5084316730499268
|
|
|
|
key: score_time
|
|
value: [0.0200789 0.01980877 0.02014947 0.0262959 0.03021979 0.02363634
|
|
0.02166891 0.01958346 0.01979709 0.01959968]
|
|
|
|
mean value: 0.02208383083343506
|
|
|
|
key: test_mcc
|
|
value: [0.79661017 0.85051727 0.8824975 0.86891154 0.73261169 0.71692818
|
|
0.77977303 0.7484552 0.83098605 0.83242375]
|
|
|
|
mean value: 0.8039714373537896
|
|
|
|
key: train_mcc
|
|
value: [0.84181388 0.87601074 0.87268907 0.87255888 0.88987569 0.88339584
|
|
0.86270867 0.8882414 0.88595389 0.89349392]
|
|
|
|
mean value: 0.8766741976060377
|
|
|
|
key: test_accuracy
|
|
value: [0.89830508 0.92372881 0.94067797 0.93220339 0.86440678 0.8559322
|
|
0.88983051 0.87288136 0.91525424 0.91525424]
|
|
|
|
mean value: 0.9008474576271186
|
|
|
|
key: train_accuracy
|
|
value: [0.92090395 0.93785311 0.93596987 0.93596987 0.94444444 0.94161959
|
|
0.93126177 0.94350282 0.94256121 0.94632768]
|
|
|
|
mean value: 0.9380414312617702
|
|
|
|
key: test_fscore
|
|
value: [0.89830508 0.92682927 0.94214876 0.93548387 0.87096774 0.864
|
|
0.88888889 0.87804878 0.91666667 0.91803279]
|
|
|
|
mean value: 0.9039371849200857
|
|
|
|
key: train_fscore
|
|
value: [0.92105263 0.93866171 0.93726937 0.93715342 0.94572217 0.94216418
|
|
0.93196645 0.94495413 0.9437788 0.94746544]
|
|
|
|
mean value: 0.939018830140036
|
|
|
|
key: test_precision
|
|
value: [0.89830508 0.890625 0.91935484 0.89230769 0.83076923 0.81818182
|
|
0.89655172 0.84375 0.90163934 0.88888889]
|
|
|
|
mean value: 0.8780373622003297
|
|
|
|
key: train_precision
|
|
value: [0.91932458 0.9266055 0.91862568 0.92014519 0.92446043 0.93345656
|
|
0.92250923 0.92128801 0.92418773 0.92779783]
|
|
|
|
mean value: 0.9238400743677632
|
|
|
|
key: test_recall
|
|
value: [0.89830508 0.96610169 0.96610169 0.98305085 0.91525424 0.91525424
|
|
0.88135593 0.91525424 0.93220339 0.94915254]
|
|
|
|
mean value: 0.9322033898305084
|
|
|
|
key: train_recall
|
|
value: [0.92278719 0.95103578 0.9566855 0.95480226 0.96798493 0.95103578
|
|
0.94161959 0.96986817 0.96421846 0.96798493]
|
|
|
|
mean value: 0.9548022598870056
|
|
|
|
key: test_roc_auc
|
|
value: [0.89830508 0.92372881 0.94067797 0.93220339 0.86440678 0.8559322
|
|
0.88983051 0.87288136 0.91525424 0.91525424]
|
|
|
|
mean value: 0.9008474576271187
|
|
|
|
key: train_roc_auc
|
|
value: [0.92090395 0.93785311 0.93596987 0.93596987 0.94444444 0.94161959
|
|
0.93126177 0.94350282 0.94256121 0.94632768]
|
|
|
|
mean value: 0.9380414312617702
|
|
|
|
key: test_jcc
|
|
value: [0.81538462 0.86363636 0.890625 0.87878788 0.77142857 0.76056338
|
|
0.8 0.7826087 0.84615385 0.84848485]
|
|
|
|
mean value: 0.8257673199809988
|
|
|
|
key: train_jcc
|
|
value: [0.85365854 0.88441331 0.88194444 0.88173913 0.89703316 0.89065256
|
|
0.87260035 0.89565217 0.89354276 0.90017513]
|
|
|
|
mean value: 0.8851411549298365
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03293633 0.07218051 0.08280826 0.06279945 0.05419064 0.04328632
|
|
0.03202009 0.04391456 0.03194404 0.09151173]
|
|
|
|
mean value: 0.05475919246673584
|
|
|
|
key: score_time
|
|
value: [0.01189208 0.01265621 0.01222682 0.0122056 0.01242328 0.01231432
|
|
0.01176882 0.01457357 0.01198077 0.01227903]
|
|
|
|
mean value: 0.012432050704956055
|
|
|
|
key: test_mcc
|
|
value: [0.73379939 0.52414242 0.68888889 0.57777778 0.47777778 0.47777778
|
|
0.68543653 0.68888889 0.59554321 0.72456884]
|
|
|
|
mean value: 0.6174601491167044
|
|
|
|
key: train_mcc
|
|
value: [0.80368504 0.80237983 0.81526535 0.81594462 0.81502272 0.79389473
|
|
0.80474248 0.81584656 0.79269343 0.80390609]
|
|
|
|
mean value: 0.8063380844783272
|
|
|
|
key: test_accuracy
|
|
value: [0.85 0.75 0.84210526 0.78947368 0.73684211 0.73684211
|
|
0.84210526 0.84210526 0.78947368 0.84210526]
|
|
|
|
mean value: 0.8021052631578947
|
|
|
|
key: train_accuracy
|
|
value: [0.90116279 0.90116279 0.90751445 0.90751445 0.90751445 0.89595376
|
|
0.9017341 0.90751445 0.89595376 0.9017341 ]
|
|
|
|
mean value: 0.902775910740691
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.70588235 0.84210526 0.77777778 0.73684211 0.73684211
|
|
0.85714286 0.84210526 0.77777778 0.82352941]
|
|
|
|
mean value: 0.7969570131637704
|
|
|
|
key: train_fscore
|
|
value: [0.89820359 0.9005848 0.90697674 0.90588235 0.90804598 0.89285714
|
|
0.89820359 0.9047619 0.89285714 0.89940828]
|
|
|
|
mean value: 0.9007781529588956
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.85714286 0.8 0.77777778 0.7 0.7
|
|
0.81818182 0.88888889 0.875 1. ]
|
|
|
|
mean value: 0.8186222111222111
|
|
|
|
key: train_precision
|
|
value: [0.92592593 0.90588235 0.91764706 0.92771084 0.90804598 0.92592593
|
|
0.92592593 0.92682927 0.91463415 0.91566265]
|
|
|
|
mean value: 0.9194190075164028
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 0.88888889 0.77777778 0.77777778 0.77777778
|
|
0.9 0.8 0.7 0.7 ]
|
|
|
|
mean value: 0.7922222222222222
|
|
|
|
key: train_recall
|
|
value: [0.87209302 0.89534884 0.89655172 0.88505747 0.90804598 0.86206897
|
|
0.87209302 0.88372093 0.87209302 0.88372093]
|
|
|
|
mean value: 0.8830793905372895
|
|
|
|
key: test_roc_auc
|
|
value: [0.85 0.75 0.84444444 0.78888889 0.73888889 0.73888889
|
|
0.83888889 0.84444444 0.79444444 0.85 ]
|
|
|
|
mean value: 0.8038888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.90116279 0.90116279 0.90757819 0.90764501 0.90751136 0.89615076
|
|
0.90156375 0.90737771 0.89581663 0.90163058]
|
|
|
|
mean value: 0.902759957230687
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.54545455 0.72727273 0.63636364 0.58333333 0.58333333
|
|
0.75 0.72727273 0.63636364 0.7 ]
|
|
|
|
mean value: 0.6658624708624709
|
|
|
|
key: train_jcc
|
|
value: [0.81521739 0.81914894 0.82978723 0.82795699 0.83157895 0.80645161
|
|
0.81521739 0.82608696 0.80645161 0.8172043 ]
|
|
|
|
mean value: 0.8195101372840654
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.75364637 1.74696612 1.66758704 1.48830342 1.18106365 1.28296232
|
|
1.21276307 1.27970004 1.13903117 1.09964514]
|
|
|
|
mean value: 1.385166835784912
|
|
|
|
key: score_time
|
|
value: [0.01253629 0.01215029 0.01247764 0.01203561 0.01205754 0.01202536
|
|
0.01558876 0.01198149 0.01194715 0.0119977 ]
|
|
|
|
mean value: 0.012479782104492188
|
|
|
|
key: test_mcc
|
|
value: [0.73379939 0.52414242 0.57777778 0.26666667 0.47777778 0.57777778
|
|
0.4719399 0.68888889 0.59554321 0.72456884]
|
|
|
|
mean value: 0.5638882645831462
|
|
|
|
key: train_mcc
|
|
value: [0.85027548 0.79091158 0.90776635 0.76901484 0.76901484 1.
|
|
1. 0.85018754 0.75840517 0.7695403 ]
|
|
|
|
mean value: 0.8465116112343021
|
|
|
|
key: test_accuracy
|
|
value: [0.85 0.75 0.78947368 0.63157895 0.73684211 0.78947368
|
|
0.73684211 0.84210526 0.78947368 0.84210526]
|
|
|
|
mean value: 0.7757894736842105
|
|
|
|
key: train_accuracy
|
|
value: [0.9244186 0.89534884 0.95375723 0.88439306 0.88439306 1.
|
|
1. 0.92485549 0.87861272 0.88439306]
|
|
|
|
mean value: 0.9230172066137922
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.70588235 0.77777778 0.63157895 0.73684211 0.77777778
|
|
0.76190476 0.84210526 0.77777778 0.82352941]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
|
|
mean value: 0.7704741393124755
|
|
|
|
key: train_fscore
|
|
value: [0.92215569 0.89411765 0.95348837 0.88372093 0.88372093 1.
|
|
1. 0.92307692 0.8742515 0.88095238]
|
|
|
|
mean value: 0.921548436927501
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.85714286 0.77777778 0.6 0.7 0.77777778
|
|
0.72727273 0.88888889 0.875 1. ]
|
|
|
|
mean value: 0.7973090798090798
|
|
|
|
key: train_precision
|
|
value: [0.95061728 0.9047619 0.96470588 0.89411765 0.89411765 1.
|
|
1. 0.93975904 0.90123457 0.90243902]
|
|
|
|
mean value: 0.9351752993619167
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 0.77777778 0.66666667 0.77777778 0.77777778
|
|
0.8 0.8 0.7 0.7 ]
|
|
|
|
mean value: 0.76
|
|
|
|
key: train_recall
|
|
value: [0.89534884 0.88372093 0.94252874 0.87356322 0.87356322 1.
|
|
1. 0.90697674 0.84883721 0.86046512]
|
|
|
|
mean value: 0.9085004009623096
|
|
|
|
key: test_roc_auc
|
|
value: [0.85 0.75 0.78888889 0.63333333 0.73888889 0.78888889
|
|
0.73333333 0.84444444 0.79444444 0.85 ]
|
|
|
|
mean value: 0.7772222222222223
|
|
|
|
key: train_roc_auc
|
|
value: [0.9244186 0.89534884 0.95382251 0.88445603 0.88445603 1.
|
|
1. 0.92475274 0.87844159 0.88425555]
|
|
|
|
mean value: 0.9229951884522855
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.54545455 0.63636364 0.46153846 0.58333333 0.63636364
|
|
0.61538462 0.72727273 0.63636364 0.7 ]
|
|
|
|
mean value: 0.6311305361305362
|
|
|
|
key: train_jcc
|
|
value: [0.85555556 0.80851064 0.91111111 0.79166667 0.79166667 1.
|
|
1. 0.85714286 0.77659574 0.78723404]
|
|
|
|
mean value: 0.8579483282674771
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01261187 0.01213384 0.00957918 0.00938869 0.00978994 0.00948906
|
|
0.01489401 0.01005197 0.0098052 0.00973368]
|
|
|
|
mean value: 0.010747742652893067
|
|
|
|
key: score_time
|
|
value: [0.01191831 0.01325202 0.0091877 0.00891948 0.00902247 0.01452756
|
|
0.01384878 0.01147175 0.00905252 0.0092392 ]
|
|
|
|
mean value: 0.011043977737426759
|
|
|
|
key: test_mcc
|
|
value: [ 0.50251891 0.50251891 0.68543653 0.26257545 0.4719399 0.48934516
|
|
0.36666667 0.57777778 -0.04494666 0.56694671]
|
|
|
|
mean value: 0.43807793600232514
|
|
|
|
key: train_mcc
|
|
value: [0.58713924 0.5605702 0.54317146 0.58537683 0.55800691 0.525706
|
|
0.57661399 0.59994285 0.6381289 0.63256178]
|
|
|
|
mean value: 0.5807218171370674
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.84210526 0.63157895 0.73684211 0.73684211
|
|
0.68421053 0.78947368 0.47368421 0.73684211]
|
|
|
|
mean value: 0.7131578947368421
|
|
|
|
key: train_accuracy
|
|
value: [0.79069767 0.77906977 0.76878613 0.79190751 0.77456647 0.73988439
|
|
0.78612717 0.79768786 0.8150289 0.8150289 ]
|
|
|
|
mean value: 0.7858784782900927
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.73684211 0.82352941 0.53333333 0.70588235 0.66666667
|
|
0.7 0.8 0.44444444 0.66666667]
|
|
|
|
mean value: 0.6814207086343309
|
|
|
|
key: train_fscore
|
|
value: [0.775 0.76829268 0.75308642 0.78571429 0.75471698 0.67625899
|
|
0.77018634 0.7826087 0.79746835 0.80487805]
|
|
|
|
mean value: 0.76682107965988
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.77777778 0.875 0.66666667 0.75 0.83333333
|
|
0.7 0.8 0.5 1. ]
|
|
|
|
mean value: 0.7680555555555556
|
|
|
|
key: train_precision
|
|
value: [0.83783784 0.80769231 0.81333333 0.81481481 0.83333333 0.90384615
|
|
0.82666667 0.84 0.875 0.84615385]
|
|
|
|
mean value: 0.8398678293678293
|
|
|
|
key: test_recall
|
|
value: [0.7 0.7 0.77777778 0.44444444 0.66666667 0.55555556
|
|
0.7 0.8 0.4 0.5 ]
|
|
|
|
mean value: 0.6244444444444445
|
|
|
|
key: train_recall
|
|
value: [0.72093023 0.73255814 0.70114943 0.75862069 0.68965517 0.54022989
|
|
0.72093023 0.73255814 0.73255814 0.76744186]
|
|
|
|
mean value: 0.7096631916599839
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.83888889 0.62222222 0.73333333 0.72777778
|
|
0.68333333 0.78888889 0.47777778 0.75 ]
|
|
|
|
mean value: 0.7122222222222222
|
|
|
|
key: train_roc_auc
|
|
value: [0.79069767 0.77906977 0.76917936 0.79210104 0.77506014 0.74104518
|
|
0.78575247 0.79731355 0.81455493 0.81475541]
|
|
|
|
mean value: 0.7859529537556803
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.58333333 0.7 0.36363636 0.54545455 0.5
|
|
0.53846154 0.66666667 0.28571429 0.5 ]
|
|
|
|
mean value: 0.5266600066600067
|
|
|
|
key: train_jcc
|
|
value: [0.63265306 0.62376238 0.6039604 0.64705882 0.60606061 0.51086957
|
|
0.62626263 0.64285714 0.66315789 0.67346939]
|
|
|
|
mean value: 0.623011187992084
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01115942 0.0152576 0.00953531 0.00891876 0.00897241 0.00979829
|
|
0.01382136 0.01307917 0.00985909 0.00930858]
|
|
|
|
mean value: 0.010970997810363769
|
|
|
|
key: score_time
|
|
value: [0.0141232 0.01306486 0.00925088 0.00852442 0.00856829 0.0086391
|
|
0.01425314 0.01109648 0.00987673 0.00942612]
|
|
|
|
mean value: 0.010682320594787598
|
|
|
|
key: test_mcc
|
|
value: [ 0.40824829 0.52414242 0.4719399 0.1495142 0.25844328 -0.19096397
|
|
0.47777778 0.68543653 0.19096397 0.64450339]
|
|
|
|
mean value: 0.36200057889350745
|
|
|
|
key: train_mcc
|
|
value: [0.61153541 0.62944075 0.57917855 0.6670714 0.56139665 0.58453737
|
|
0.64660058 0.63256178 0.621633 0.63405536]
|
|
|
|
mean value: 0.6168010848431563
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.75 0.73684211 0.57894737 0.63157895 0.42105263
|
|
0.73684211 0.84210526 0.57894737 0.78947368]
|
|
|
|
mean value: 0.6765789473684211
|
|
|
|
key: train_accuracy
|
|
value: [0.79651163 0.81395349 0.78612717 0.83236994 0.78034682 0.79190751
|
|
0.82080925 0.8150289 0.80924855 0.8150289 ]
|
|
|
|
mean value: 0.8061332168302191
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.70588235 0.70588235 0.5 0.58823529 0.26666667
|
|
0.73684211 0.85714286 0.5 0.75 ]
|
|
|
|
mean value: 0.6337924356345409
|
|
|
|
key: train_fscore
|
|
value: [0.76821192 0.80722892 0.77018634 0.82634731 0.77647059 0.78823529
|
|
0.80745342 0.80487805 0.79754601 0.80246914]
|
|
|
|
mean value: 0.7949026972340305
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.85714286 0.75 0.57142857 0.625 0.33333333
|
|
0.77777778 0.81818182 0.66666667 1. ]
|
|
|
|
mean value: 0.7066197691197691
|
|
|
|
key: train_precision
|
|
value: [0.89230769 0.8375 0.83783784 0.8625 0.79518072 0.80722892
|
|
0.86666667 0.84615385 0.84415584 0.85526316]
|
|
|
|
mean value: 0.8444794683570841
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 0.66666667 0.44444444 0.55555556 0.22222222
|
|
0.7 0.9 0.4 0.6 ]
|
|
|
|
mean value: 0.5888888888888889
|
|
|
|
key: train_recall
|
|
value: [0.6744186 0.77906977 0.71264368 0.79310345 0.75862069 0.77011494
|
|
0.75581395 0.76744186 0.75581395 0.75581395]
|
|
|
|
mean value: 0.7522854851643945
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.75 0.73333333 0.57222222 0.62777778 0.41111111
|
|
0.73888889 0.83888889 0.58888889 0.8 ]
|
|
|
|
mean value: 0.6761111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.79651163 0.81395349 0.7865544 0.83259824 0.78047314 0.79203422
|
|
0.82043571 0.81475541 0.80894146 0.81468859]
|
|
|
|
mean value: 0.8060946271050521
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.54545455 0.54545455 0.33333333 0.41666667 0.15384615
|
|
0.58333333 0.75 0.33333333 0.6 ]
|
|
|
|
mean value: 0.48328504828504826
|
|
|
|
key: train_jcc
|
|
value: [0.62365591 0.67676768 0.62626263 0.70408163 0.63461538 0.65048544
|
|
0.67708333 0.67346939 0.66326531 0.67010309]
|
|
|
|
mean value: 0.6599789791164837
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01465583 0.01319671 0.0087111 0.00918531 0.01440859 0.0102222
|
|
0.00871563 0.008986 0.01243424 0.00935054]
|
|
|
|
mean value: 0.010986614227294921
|
|
|
|
key: score_time
|
|
value: [0.02262044 0.01177096 0.01510739 0.01441455 0.0308919 0.01498818
|
|
0.01527858 0.01396251 0.01621389 0.01483917]
|
|
|
|
mean value: 0.017008757591247557
|
|
|
|
key: test_mcc
|
|
value: [ 0.10050378 0.10482848 0.58655573 -0.15555556 0.47777778 0.25844328
|
|
0.06900656 0.32756921 0.36666667 0.36666667]
|
|
|
|
mean value: 0.2502462592681145
|
|
|
|
key: train_mcc
|
|
value: [0.51385602 0.53721311 0.49131248 0.51587087 0.47981823 0.50287395
|
|
0.50354996 0.48038131 0.4917028 0.55071491]
|
|
|
|
mean value: 0.5067293647376769
|
|
|
|
key: test_accuracy
|
|
value: [0.55 0.55 0.78947368 0.42105263 0.73684211 0.63157895
|
|
0.52631579 0.63157895 0.68421053 0.68421053]
|
|
|
|
mean value: 0.6205263157894737
|
|
|
|
key: train_accuracy
|
|
value: [0.75581395 0.76744186 0.74566474 0.75722543 0.73988439 0.75144509
|
|
0.75144509 0.73988439 0.74566474 0.77456647]
|
|
|
|
mean value: 0.7529036160774297
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.47058824 0.75 0.42105263 0.73684211 0.58823529
|
|
0.47058824 0.53333333 0.7 0.7 ]
|
|
|
|
mean value: 0.5942068406309892
|
|
|
|
key: train_fscore
|
|
value: [0.74390244 0.75609756 0.74712644 0.75 0.73988439 0.75428571
|
|
0.74251497 0.73053892 0.73809524 0.76363636]
|
|
|
|
mean value: 0.7466082038078078
|
|
|
|
key: test_precision
|
|
value: [0.54545455 0.57142857 0.85714286 0.4 0.7 0.625
|
|
0.57142857 0.8 0.7 0.7 ]
|
|
|
|
mean value: 0.6470454545454545
|
|
|
|
key: train_precision
|
|
value: [0.78205128 0.79487179 0.74712644 0.77777778 0.74418605 0.75
|
|
0.7654321 0.75308642 0.75609756 0.79746835]
|
|
|
|
mean value: 0.7668097771918599
|
|
|
|
key: test_recall
|
|
value: [0.6 0.4 0.66666667 0.44444444 0.77777778 0.55555556
|
|
0.4 0.4 0.7 0.7 ]
|
|
|
|
mean value: 0.5644444444444444
|
|
|
|
key: train_recall
|
|
value: [0.70930233 0.72093023 0.74712644 0.72413793 0.73563218 0.75862069
|
|
0.72093023 0.70930233 0.72093023 0.73255814]
|
|
|
|
mean value: 0.7279470729751403
|
|
|
|
key: test_roc_auc
|
|
value: [0.55 0.55 0.78333333 0.42222222 0.73888889 0.62777778
|
|
0.53333333 0.64444444 0.68333333 0.68333333]
|
|
|
|
mean value: 0.6216666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.75581395 0.76744186 0.74565624 0.7574178 0.73990912 0.75140337
|
|
0.75126971 0.73970863 0.74552259 0.77432505]
|
|
|
|
mean value: 0.7528468323977546
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.30769231 0.6 0.26666667 0.58333333 0.41666667
|
|
0.30769231 0.36363636 0.53846154 0.53846154]
|
|
|
|
mean value: 0.43226107226107224
|
|
|
|
key: train_jcc
|
|
value: [0.59223301 0.60784314 0.59633028 0.6 0.58715596 0.60550459
|
|
0.59047619 0.5754717 0.58490566 0.61764706]
|
|
|
|
mean value: 0.5957567580441999
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0218339 0.01240969 0.0110023 0.01060295 0.0106616 0.01891136
|
|
0.01211572 0.0112946 0.01123428 0.01829505]
|
|
|
|
mean value: 0.013836145401000977
|
|
|
|
key: score_time
|
|
value: [0.0158236 0.00961041 0.00917125 0.00896597 0.00890923 0.01516652
|
|
0.0097959 0.00949717 0.00934672 0.01622367]
|
|
|
|
mean value: 0.011251044273376466
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.34641016 0.68888889 0.36666667 0.38204659 0.25844328
|
|
0.58655573 0.80903983 0.38204659 0.56694671]
|
|
|
|
mean value: 0.4795292735513177
|
|
|
|
key: train_mcc
|
|
value: [0.72269123 0.79262909 0.74755742 0.78171912 0.75856599 0.80796002
|
|
0.77381952 0.75959752 0.79520134 0.7828067 ]
|
|
|
|
mean value: 0.7722547953829283
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.65 0.84210526 0.68421053 0.68421053 0.63157895
|
|
0.78947368 0.89473684 0.68421053 0.73684211]
|
|
|
|
mean value: 0.7297368421052631
|
|
|
|
key: train_accuracy
|
|
value: [0.86046512 0.89534884 0.87283237 0.89017341 0.87861272 0.9017341
|
|
0.88439306 0.87861272 0.89595376 0.89017341]
|
|
|
|
mean value: 0.884829950262132
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.53333333 0.84210526 0.66666667 0.7 0.58823529
|
|
0.81818182 0.88888889 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7098017324952309
|
|
|
|
key: train_fscore
|
|
value: [0.85542169 0.89156627 0.86904762 0.88757396 0.87573964 0.8969697
|
|
0.87654321 0.87272727 0.8902439 0.88484848]
|
|
|
|
mean value: 0.8800681747183325
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.8 0.8 0.66666667 0.63636364 0.625
|
|
0.75 1. 0.75 1. ]
|
|
|
|
mean value: 0.769469696969697
|
|
|
|
key: train_precision
|
|
value: [0.8875 0.925 0.90123457 0.91463415 0.90243902 0.94871795
|
|
0.93421053 0.91139241 0.93589744 0.92405063]
|
|
|
|
mean value: 0.91850766875388
|
|
|
|
key: test_recall
|
|
value: [0.8 0.4 0.88888889 0.66666667 0.77777778 0.55555556
|
|
0.9 0.8 0.6 0.5 ]
|
|
|
|
mean value: 0.6888888888888889
|
|
|
|
key: train_recall
|
|
value: [0.8255814 0.86046512 0.83908046 0.86206897 0.85057471 0.85057471
|
|
0.8255814 0.8372093 0.84883721 0.84883721]
|
|
|
|
mean value: 0.844881047848169
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.65 0.84444444 0.68333333 0.68888889 0.62777778
|
|
0.78333333 0.9 0.68888889 0.75 ]
|
|
|
|
mean value: 0.7316666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.86046512 0.89534884 0.8730286 0.89033681 0.87877573 0.90203154
|
|
0.88405507 0.87837477 0.89568297 0.88993585]
|
|
|
|
mean value: 0.884803528468324
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.36363636 0.72727273 0.5 0.53846154 0.41666667
|
|
0.69230769 0.8 0.5 0.5 ]
|
|
|
|
mean value: 0.560977355977356
|
|
|
|
key: train_jcc
|
|
value: [0.74736842 0.80434783 0.76842105 0.79787234 0.77894737 0.81318681
|
|
0.78021978 0.77419355 0.8021978 0.79347826]
|
|
|
|
mean value: 0.7860233213478809
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.76739192 0.71427822 0.70069671 1.12247586 0.97163486 0.84489369
|
|
0.94266272 0.79755092 0.79987383 0.79371977]
|
|
|
|
mean value: 0.8455178499221802
|
|
|
|
key: score_time
|
|
value: [0.01221752 0.01221156 0.01485491 0.0153749 0.01491261 0.01489758
|
|
0.01500487 0.01225162 0.02208447 0.01244259]
|
|
|
|
mean value: 0.014625263214111329
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.52414242 0.59554321 0.4719399 0.38204659 0.57777778
|
|
0.25844328 0.57777778 0.26666667 0.64450339]
|
|
|
|
mean value: 0.4707089302589414
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.75 0.78947368 0.73684211 0.68421053 0.78947368
|
|
0.63157895 0.78947368 0.63157895 0.78947368]
|
|
|
|
mean value: 0.7292105263157894
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.70588235 0.8 0.70588235 0.7 0.77777778
|
|
0.66666667 0.8 0.63157895 0.75 ]
|
|
|
|
mean value: 0.7265060824967946
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[0.66666667 0.85714286 0.72727273 0.75 0.63636364 0.77777778
|
|
0.63636364 0.8 0.66666667 1. ]
|
|
|
|
mean value: 0.7518253968253968
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 0.88888889 0.66666667 0.77777778 0.77777778
|
|
0.7 0.8 0.6 0.6 ]
|
|
|
|
mean value: 0.7211111111111111
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.75 0.79444444 0.73333333 0.68888889 0.78888889
|
|
0.62777778 0.78888889 0.63333333 0.8 ]
|
|
|
|
mean value: 0.7305555555555556
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.54545455 0.66666667 0.54545455 0.53846154 0.63636364
|
|
0.5 0.66666667 0.46153846 0.6 ]
|
|
|
|
mean value: 0.5732034632034632
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01919198 0.01430988 0.01597047 0.01398993 0.01464057 0.01433396
|
|
0.01433349 0.01382279 0.01514721 0.01489377]
|
|
|
|
mean value: 0.01506340503692627
|
|
|
|
key: score_time
|
|
value: [0.01156759 0.00891519 0.00891733 0.00860238 0.00855613 0.00855994
|
|
0.00855803 0.00853229 0.0084517 0.00850177]
|
|
|
|
mean value: 0.008916234970092774
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.61237244 0.68888889 0.80903983 0.80507649 0.78888889
|
|
0.68543653 0.58655573 0.4719399 0.72456884]
|
|
|
|
mean value: 0.6581015818105111
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.8 0.84210526 0.89473684 0.89473684 0.89473684
|
|
0.84210526 0.78947368 0.73684211 0.84210526]
|
|
|
|
mean value: 0.8236842105263158
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.77777778 0.84210526 0.9 0.875 0.88888889
|
|
0.85714286 0.81818182 0.76190476 0.82352941]
|
|
|
|
mean value: 0.8211197445485371
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.875 0.8 0.81818182 1. 0.88888889
|
|
0.81818182 0.75 0.72727273 1. ]
|
|
|
|
mean value: 0.8427525252525253
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 0.7 0.88888889 1. 0.77777778 0.88888889
|
|
0.9 0.9 0.8 0.7 ]
|
|
|
|
mean value: 0.8155555555555556
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.8 0.84444444 0.9 0.88888889 0.89444444
|
|
0.83888889 0.78333333 0.73333333 0.85 ]
|
|
|
|
mean value: 0.8233333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.63636364 0.72727273 0.81818182 0.77777778 0.8
|
|
0.75 0.69230769 0.61538462 0.7 ]
|
|
|
|
mean value: 0.7017288267288267
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09142828 0.09060216 0.09192824 0.09156013 0.09199476 0.09211755
|
|
0.09502268 0.09244156 0.09444451 0.09173989]
|
|
|
|
mean value: 0.09232797622680664
|
|
|
|
key: score_time
|
|
value: [0.01697731 0.01694632 0.01721001 0.01764488 0.01715279 0.0171926
|
|
0.01730871 0.01722693 0.01717067 0.01710653]
|
|
|
|
mean value: 0.01719367504119873
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.43643578 0.59554321 0.26666667 0.50604808 0.47777778
|
|
0.58655573 0.47777778 0.47777778 0.68888889]
|
|
|
|
mean value: 0.4921719973959328
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.7 0.78947368 0.63157895 0.73684211 0.73684211
|
|
0.78947368 0.73684211 0.73684211 0.84210526]
|
|
|
|
mean value: 0.74
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.625 0.8 0.63157895 0.76190476 0.73684211
|
|
0.81818182 0.73684211 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7416569833675096
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.83333333 0.72727273 0.6 0.66666667 0.7
|
|
0.75 0.77777778 0.77777778 0.88888889]
|
|
|
|
mean value: 0.7388383838383838
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.5 0.88888889 0.66666667 0.88888889 0.77777778
|
|
0.9 0.7 0.7 0.8 ]
|
|
|
|
mean value: 0.7622222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.7 0.79444444 0.63333333 0.74444444 0.73888889
|
|
0.78333333 0.73888889 0.73888889 0.84444444]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.45454545 0.66666667 0.46153846 0.61538462 0.58333333
|
|
0.69230769 0.58333333 0.58333333 0.72727273]
|
|
|
|
mean value: 0.5939144189144189
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00993848 0.00880766 0.00879192 0.00975966 0.00946617 0.00881648
|
|
0.00873446 0.00887775 0.00900149 0.0090096 ]
|
|
|
|
mean value: 0.009120368957519531
|
|
|
|
key: score_time
|
|
value: [0.00890303 0.00849104 0.0085125 0.00915837 0.00863862 0.00868106
|
|
0.0084877 0.0085032 0.00851893 0.00912261]
|
|
|
|
mean value: 0.008701705932617187
|
|
|
|
key: test_mcc
|
|
value: [ 0.52414242 0.10050378 0.47777778 -0.15555556 0.47777778 0.57777778
|
|
0.26257545 -0.05555556 0.4719399 0.16854997]
|
|
|
|
mean value: 0.28499337452636414
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.55 0.73684211 0.42105263 0.73684211 0.78947368
|
|
0.63157895 0.47368421 0.73684211 0.57894737]
|
|
|
|
mean value: 0.6405263157894737
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.57142857 0.73684211 0.42105263 0.73684211 0.77777778
|
|
0.69565217 0.5 0.76190476 0.55555556]
|
|
|
|
mean value: 0.6539664378337147
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.54545455 0.7 0.4 0.7 0.77777778
|
|
0.61538462 0.5 0.72727273 0.625 ]
|
|
|
|
mean value: 0.6283197358197358
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9 0.6 0.77777778 0.44444444 0.77777778 0.77777778
|
|
0.8 0.5 0.8 0.5 ]
|
|
|
|
mean value: 0.6877777777777778
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.55 0.73888889 0.42222222 0.73888889 0.78888889
|
|
0.62222222 0.47222222 0.73333333 0.58333333]
|
|
|
|
mean value: 0.64
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.4 0.58333333 0.26666667 0.58333333 0.63636364
|
|
0.53333333 0.33333333 0.61538462 0.38461538]
|
|
|
|
mean value: 0.49792207792207793
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.29432726 1.19392848 1.19852495 1.19805241 1.21901321 1.19706225
|
|
1.19417572 1.19714236 1.21871495 1.20845175]
|
|
|
|
mean value: 1.2119393348693848
|
|
|
|
key: score_time
|
|
value: [0.08806133 0.09488583 0.08754897 0.09035516 0.09043193 0.08836198
|
|
0.08861899 0.08783913 0.09165597 0.14491606]
|
|
|
|
mean value: 0.09526753425598145
|
|
|
|
key: test_mcc
|
|
value: [0.50251891 0.57735027 0.78888889 0.36666667 0.57777778 0.68888889
|
|
0.89893315 0.68543653 0.4719399 0.9 ]
|
|
|
|
mean value: 0.6458400979554342
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.89473684 0.68421053 0.78947368 0.84210526
|
|
0.94736842 0.84210526 0.73684211 0.94736842]
|
|
|
|
mean value: 0.8184210526315789
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.66666667 0.88888889 0.66666667 0.77777778 0.84210526
|
|
0.95238095 0.85714286 0.76190476 0.94736842]
|
|
|
|
mean value: 0.8122807017543859
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.72727273 1. 0.88888889 0.66666667 0.77777778 0.8
|
|
0.90909091 0.81818182 0.72727273 1. ]
|
|
|
|
mean value: 0.8315151515151515
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.5 0.88888889 0.66666667 0.77777778 0.88888889
|
|
1. 0.9 0.8 0.9 ]
|
|
|
|
mean value: 0.8122222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.89444444 0.68333333 0.78888889 0.84444444
|
|
0.94444444 0.83888889 0.73333333 0.95 ]
|
|
|
|
mean value: 0.8177777777777777
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.5 0.8 0.5 0.63636364 0.72727273
|
|
0.90909091 0.75 0.61538462 0.9 ]
|
|
|
|
mean value: 0.6953496503496504
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.84694982 0.8641293 0.88063264 0.89809251 0.88324881 0.89919996
|
|
0.88049603 0.87666178 0.96809459 0.94219756]
|
|
|
|
mean value: 0.8939702987670899
|
|
|
|
key: score_time
|
|
value: [0.19970918 0.22537541 0.18459272 0.16587257 0.24372816 0.14228034
|
|
0.16846371 0.21508193 0.12721968 0.2044332 ]
|
|
|
|
mean value: 0.18767569065093995
|
|
|
|
key: test_mcc
|
|
value: [0.50251891 0.57735027 0.9 0.47777778 0.68543653 0.68888889
|
|
0.78888889 0.68543653 0.57777778 0.9 ]
|
|
|
|
mean value: 0.6784075563827824
|
|
|
|
key: train_mcc
|
|
value: [0.93023256 0.94192415 0.95375568 0.93063352 0.93063352 0.91913927
|
|
0.93063352 0.9308916 0.93063352 0.93063352]
|
|
|
|
mean value: 0.9329110853822355
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.94736842 0.73684211 0.84210526 0.84210526
|
|
0.89473684 0.84210526 0.78947368 0.94736842]
|
|
|
|
mean value: 0.8342105263157894
|
|
|
|
key: train_accuracy
|
|
value: [0.96511628 0.97093023 0.97687861 0.96531792 0.96531792 0.95953757
|
|
0.96531792 0.96531792 0.96531792 0.96531792]
|
|
|
|
mean value: 0.9664370211049872
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.66666667 0.94736842 0.73684211 0.82352941 0.84210526
|
|
0.9 0.85714286 0.8 0.94736842]
|
|
|
|
mean value: 0.8282927908005308
|
|
|
|
key: train_fscore
|
|
value: [0.96511628 0.97076023 0.97701149 0.96551724 0.96551724 0.95953757
|
|
0.96511628 0.96551724 0.96511628 0.96511628]
|
|
|
|
mean value: 0.9664326140842339
|
|
|
|
key: test_precision
|
|
value: [0.72727273 1. 0.9 0.7 0.875 0.8
|
|
0.9 0.81818182 0.8 1. ]
|
|
|
|
mean value: 0.8520454545454546
|
|
|
|
key: train_precision
|
|
value: [0.96511628 0.97647059 0.97701149 0.96551724 0.96551724 0.96511628
|
|
0.96511628 0.95454545 0.96511628 0.96511628]
|
|
|
|
mean value: 0.9664643415141081
|
|
|
|
key: test_recall
|
|
value: [0.8 0.5 1. 0.77777778 0.77777778 0.88888889
|
|
0.9 0.9 0.8 0.9 ]
|
|
|
|
mean value: 0.8244444444444444
|
|
|
|
key: train_recall
|
|
value: [0.96511628 0.96511628 0.97701149 0.96551724 0.96551724 0.95402299
|
|
0.96511628 0.97674419 0.96511628 0.96511628]
|
|
|
|
mean value: 0.966439454691259
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.95 0.73888889 0.83888889 0.84444444
|
|
0.89444444 0.83888889 0.78888889 0.95 ]
|
|
|
|
mean value: 0.8344444444444444
|
|
|
|
key: train_roc_auc
|
|
value: [0.96511628 0.97093023 0.97687784 0.96531676 0.96531676 0.95956963
|
|
0.96531676 0.96538359 0.96531676 0.96531676]
|
|
|
|
mean value: 0.9664461373964182
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.5 0.9 0.58333333 0.7 0.72727273
|
|
0.81818182 0.75 0.66666667 0.9 ]
|
|
|
|
mean value: 0.7160839160839161
|
|
|
|
key: train_jcc
|
|
value: [0.93258427 0.94318182 0.95505618 0.93333333 0.93333333 0.92222222
|
|
0.93258427 0.93333333 0.93258427 0.93258427]
|
|
|
|
mean value: 0.9350797298831006
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01210046 0.00914764 0.00972962 0.00895143 0.008744 0.00893521
|
|
0.00953317 0.0088737 0.00896764 0.00890923]
|
|
|
|
mean value: 0.009389209747314452
|
|
|
|
key: score_time
|
|
value: [0.0097425 0.00875807 0.00896716 0.00848508 0.00848126 0.00850248
|
|
0.00859761 0.00863433 0.00853014 0.00861716]
|
|
|
|
mean value: 0.008731579780578614
|
|
|
|
key: test_mcc
|
|
value: [ 0.40824829 0.52414242 0.4719399 0.1495142 0.25844328 -0.19096397
|
|
0.47777778 0.68543653 0.19096397 0.64450339]
|
|
|
|
mean value: 0.36200057889350745
|
|
|
|
key: train_mcc
|
|
value: [0.61153541 0.62944075 0.57917855 0.6670714 0.56139665 0.58453737
|
|
0.64660058 0.63256178 0.621633 0.63405536]
|
|
|
|
mean value: 0.6168010848431563
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.75 0.73684211 0.57894737 0.63157895 0.42105263
|
|
0.73684211 0.84210526 0.57894737 0.78947368]
|
|
|
|
mean value: 0.6765789473684211
|
|
|
|
key: train_accuracy
|
|
value: [0.79651163 0.81395349 0.78612717 0.83236994 0.78034682 0.79190751
|
|
0.82080925 0.8150289 0.80924855 0.8150289 ]
|
|
|
|
mean value: 0.8061332168302191
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.70588235 0.70588235 0.5 0.58823529 0.26666667
|
|
0.73684211 0.85714286 0.5 0.75 ]
|
|
|
|
mean value: 0.6337924356345409
|
|
|
|
key: train_fscore
|
|
value: [0.76821192 0.80722892 0.77018634 0.82634731 0.77647059 0.78823529
|
|
0.80745342 0.80487805 0.79754601 0.80246914]
|
|
|
|
mean value: 0.7949026972340305
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.85714286 0.75 0.57142857 0.625 0.33333333
|
|
0.77777778 0.81818182 0.66666667 1. ]
|
|
|
|
mean value: 0.7066197691197691
|
|
|
|
key: train_precision
|
|
value: [0.89230769 0.8375 0.83783784 0.8625 0.79518072 0.80722892
|
|
0.86666667 0.84615385 0.84415584 0.85526316]
|
|
|
|
mean value: 0.8444794683570841
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 0.66666667 0.44444444 0.55555556 0.22222222
|
|
0.7 0.9 0.4 0.6 ]
|
|
|
|
mean value: 0.5888888888888889
|
|
|
|
key: train_recall
|
|
value: [0.6744186 0.77906977 0.71264368 0.79310345 0.75862069 0.77011494
|
|
0.75581395 0.76744186 0.75581395 0.75581395]
|
|
|
|
mean value: 0.7522854851643945
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.75 0.73333333 0.57222222 0.62777778 0.41111111
|
|
0.73888889 0.83888889 0.58888889 0.8 ]
|
|
|
|
mean value: 0.6761111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.79651163 0.81395349 0.7865544 0.83259824 0.78047314 0.79203422
|
|
0.82043571 0.81475541 0.80894146 0.81468859]
|
|
|
|
mean value: 0.8060946271050521
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.54545455 0.54545455 0.33333333 0.41666667 0.15384615
|
|
0.58333333 0.75 0.33333333 0.6 ]
|
|
|
|
mean value: 0.48328504828504826
|
|
|
|
key: train_jcc
|
|
value: [0.62365591 0.67676768 0.62626263 0.70408163 0.63461538 0.65048544
|
|
0.67708333 0.67346939 0.66326531 0.67010309]
|
|
|
|
mean value: 0.6599789791164837
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.54011297 0.37033105 0.04701519 0.04988456 0.0512135 0.05416465
|
|
0.05368471 0.11100507 0.2043345 1.01194453]
|
|
|
|
mean value: 0.24936907291412352
|
|
|
|
key: score_time
|
|
value: [0.01697421 0.0116744 0.01045799 0.010432 0.01109362 0.01025105
|
|
0.01184797 0.01086235 0.01093817 0.01072168]
|
|
|
|
mean value: 0.011525344848632813
|
|
|
|
key: test_mcc
|
|
value: [0.70352647 0.61237244 0.57777778 0.9 0.80507649 0.68888889
|
|
0.78888889 0.78888889 0.58655573 0.80903983]
|
|
|
|
mean value: 0.7261015397117992
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85 0.8 0.78947368 0.94736842 0.89473684 0.84210526
|
|
0.89473684 0.89473684 0.78947368 0.89473684]
|
|
|
|
mean value: 0.8597368421052631
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.77777778 0.77777778 0.94736842 0.875 0.84210526
|
|
0.9 0.9 0.81818182 0.88888889]
|
|
|
|
mean value: 0.8569205209994684
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.875 0.77777778 0.9 1. 0.8
|
|
0.9 0.9 0.75 1. ]
|
|
|
|
mean value: 0.8791666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.7 0.77777778 1. 0.77777778 0.88888889
|
|
0.9 0.9 0.9 0.8 ]
|
|
|
|
mean value: 0.8444444444444444
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.85 0.8 0.78888889 0.95 0.88888889 0.84444444
|
|
0.89444444 0.89444444 0.78333333 0.9 ]
|
|
|
|
mean value: 0.8594444444444445
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.63636364 0.63636364 0.9 0.77777778 0.72727273
|
|
0.81818182 0.81818182 0.69230769 0.8 ]
|
|
|
|
mean value: 0.7533721833721834
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.71
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04340696 0.02456737 0.04545736 0.09076643 0.04470468 0.02501273
|
|
0.03898215 0.06944561 0.06575561 0.06470251]
|
|
|
|
mean value: 0.05128014087677002
|
|
|
|
key: score_time
|
|
value: [0.01323032 0.01192713 0.02391839 0.0202477 0.01200914 0.01202369
|
|
0.02298069 0.02384233 0.02568412 0.02394533]
|
|
|
|
mean value: 0.018980884552001955
|
|
|
|
key: test_mcc
|
|
value: [0.52414242 0.40824829 0.36666667 0.47777778 0.68543653 0.28752732
|
|
0.47777778 0.64450339 0.28752732 0.64450339]
|
|
|
|
mean value: 0.4804110863940948
|
|
|
|
key: train_mcc
|
|
value: [0.95348837 0.96518153 0.97687784 0.95375568 0.97687784 0.95375568
|
|
0.95375568 0.95375568 0.98850575 0.95375568]
|
|
|
|
mean value: 0.9629709732530025
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.7 0.68421053 0.73684211 0.84210526 0.63157895
|
|
0.73684211 0.78947368 0.63157895 0.78947368]
|
|
|
|
mean value: 0.7292105263157894
|
|
|
|
key: train_accuracy
|
|
value: [0.97674419 0.98255814 0.98843931 0.97687861 0.98843931 0.97687861
|
|
0.97687861 0.97687861 0.99421965 0.97687861]
|
|
|
|
mean value: 0.9814793655061164
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.66666667 0.66666667 0.73684211 0.82352941 0.66666667
|
|
0.73684211 0.75 0.58823529 0.75 ]
|
|
|
|
mean value: 0.7168057612060842
|
|
|
|
key: train_fscore
|
|
value: [0.97674419 0.98265896 0.98850575 0.97701149 0.98850575 0.97701149
|
|
0.97674419 0.97674419 0.99421965 0.97674419]
|
|
|
|
mean value: 0.981488983966143
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.75 0.66666667 0.7 0.875 0.58333333
|
|
0.77777778 1. 0.71428571 1. ]
|
|
|
|
mean value: 0.7759371184371184
|
|
|
|
key: train_precision
|
|
value: [0.97674419 0.97701149 0.98850575 0.97701149 0.98850575 0.97701149
|
|
0.97674419 0.97674419 0.98850575 0.97674419]
|
|
|
|
mean value: 0.9803528468323978
|
|
|
|
key: test_recall
|
|
value: [0.9 0.6 0.66666667 0.77777778 0.77777778 0.77777778
|
|
0.7 0.6 0.5 0.6 ]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_recall
|
|
value: [0.97674419 0.98837209 0.98850575 0.97701149 0.98850575 0.97701149
|
|
0.97674419 0.97674419 1. 0.97674419]
|
|
|
|
mean value: 0.9826383319967923
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.7 0.68333333 0.73888889 0.83888889 0.63888889
|
|
0.73888889 0.8 0.63888889 0.8 ]
|
|
|
|
mean value: 0.7327777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [0.97674419 0.98255814 0.98843892 0.97687784 0.98843892 0.97687784
|
|
0.97687784 0.97687784 0.99425287 0.97687784]
|
|
|
|
mean value: 0.9814822240042769
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.5 0.5 0.58333333 0.7 0.5
|
|
0.58333333 0.6 0.41666667 0.6 ]
|
|
|
|
mean value: 0.5626190476190476
|
|
|
|
key: train_jcc
|
|
value: [0.95454545 0.96590909 0.97727273 0.95505618 0.97727273 0.95505618
|
|
0.95454545 0.95454545 0.98850575 0.95454545]
|
|
|
|
mean value: 0.9637254470313362
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02351999 0.00953579 0.01062107 0.01079059 0.00973964 0.00980783
|
|
0.00928116 0.00896358 0.00895929 0.00897264]
|
|
|
|
mean value: 0.011019158363342284
|
|
|
|
key: score_time
|
|
value: [0.01031256 0.00982332 0.01004648 0.00993109 0.00895858 0.00953817
|
|
0.0095973 0.00861597 0.00859809 0.00856042]
|
|
|
|
mean value: 0.009398198127746582
|
|
|
|
key: test_mcc
|
|
value: [0.40824829 0.43643578 0.80903983 0.4719399 0.57777778 0.57777778
|
|
0.48934516 0.80903983 0.50604808 0.41773368]
|
|
|
|
mean value: 0.5503386117428462
|
|
|
|
key: train_mcc
|
|
value: [0.54654858 0.53502842 0.59885202 0.54953875 0.5975094 0.64388407
|
|
0.54918471 0.58380112 0.65377468 0.61939324]
|
|
|
|
mean value: 0.5877514988740151
|
|
|
|
key: test_accuracy
|
|
value: [0.7 0.7 0.89473684 0.73684211 0.78947368 0.78947368
|
|
0.73684211 0.89473684 0.73684211 0.68421053]
|
|
|
|
mean value: 0.7663157894736842
|
|
|
|
key: train_accuracy
|
|
value: [0.77325581 0.76744186 0.79768786 0.77456647 0.79768786 0.82080925
|
|
0.77456647 0.79190751 0.8265896 0.80924855]
|
|
|
|
mean value: 0.7933761258233634
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.625 0.9 0.70588235 0.77777778 0.77777778
|
|
0.7826087 0.88888889 0.70588235 0.625 ]
|
|
|
|
mean value: 0.7516090573251698
|
|
|
|
key: train_fscore
|
|
value: [0.77192982 0.77011494 0.78787879 0.77192982 0.79041916 0.81437126
|
|
0.77456647 0.79069767 0.82142857 0.80239521]
|
|
|
|
mean value: 0.789573172810846
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.83333333 0.81818182 0.75 0.77777778 0.77777778
|
|
0.69230769 1. 0.85714286 0.83333333]
|
|
|
|
mean value: 0.8006521256521256
|
|
|
|
key: train_precision
|
|
value: [0.77647059 0.76136364 0.83333333 0.78571429 0.825 0.85
|
|
0.77011494 0.79069767 0.84146341 0.82716049]
|
|
|
|
mean value: 0.8061318369055197
|
|
|
|
key: test_recall
|
|
value: [0.8 0.5 1. 0.66666667 0.77777778 0.77777778
|
|
0.9 0.8 0.6 0.5 ]
|
|
|
|
mean value: 0.7322222222222222
|
|
|
|
key: train_recall
|
|
value: [0.76744186 0.77906977 0.74712644 0.75862069 0.75862069 0.7816092
|
|
0.77906977 0.79069767 0.80232558 0.77906977]
|
|
|
|
mean value: 0.7743651430098905
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.7 0.9 0.73333333 0.78888889 0.78888889
|
|
0.72777778 0.9 0.74444444 0.69444444]
|
|
|
|
mean value: 0.7677777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [0.77325581 0.76744186 0.79798182 0.77465918 0.797915 0.82103716
|
|
0.77459235 0.79190056 0.82645015 0.80907511]
|
|
|
|
mean value: 0.7934309008286554
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.45454545 0.81818182 0.54545455 0.63636364 0.63636364
|
|
0.64285714 0.8 0.54545455 0.45454545]
|
|
|
|
mean value: 0.6105194805194805
|
|
|
|
key: train_jcc
|
|
value: [0.62857143 0.62616822 0.65 0.62857143 0.65346535 0.68686869
|
|
0.63207547 0.65384615 0.6969697 0.67 ]
|
|
|
|
mean value: 0.6526536437359227
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01055431 0.01448035 0.01345754 0.01367259 0.01554561 0.01567435
|
|
0.01575613 0.01506877 0.01487374 0.01496863]
|
|
|
|
mean value: 0.014405202865600587
|
|
|
|
key: score_time
|
|
value: [0.00875115 0.01127553 0.01121569 0.01178217 0.01153636 0.01154399
|
|
0.01148987 0.01157165 0.01159549 0.01154137]
|
|
|
|
mean value: 0.01123032569885254
|
|
|
|
key: test_mcc
|
|
value: [0.50251891 0.57735027 0.50604808 0.4719399 0.57777778 0.4719399
|
|
0.48934516 0.68888889 0.41773368 0.72456884]
|
|
|
|
mean value: 0.5428111405544918
|
|
|
|
key: train_mcc
|
|
value: [0.80499665 0.80897033 0.7632449 0.82802537 0.89600322 0.80619539
|
|
0.83346839 0.85024743 0.7891276 0.78622674]
|
|
|
|
mean value: 0.8166506000917169
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.73684211 0.73684211 0.78947368 0.73684211
|
|
0.73684211 0.84210526 0.68421053 0.84210526]
|
|
|
|
mean value: 0.7605263157894737
|
|
|
|
key: train_accuracy
|
|
value: [0.90116279 0.90116279 0.87861272 0.9132948 0.94797688 0.9017341
|
|
0.9132948 0.92485549 0.89017341 0.88439306]
|
|
|
|
mean value: 0.9056660841510956
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.66666667 0.76190476 0.70588235 0.77777778 0.70588235
|
|
0.7826087 0.84210526 0.625 0.82352941]
|
|
|
|
mean value: 0.7453262044711095
|
|
|
|
key: train_fscore
|
|
value: [0.8969697 0.89440994 0.88648649 0.9112426 0.94857143 0.89820359
|
|
0.91803279 0.92571429 0.88050314 0.89473684]
|
|
|
|
mean value: 0.9054870805639361
|
|
|
|
key: test_precision
|
|
value: [0.72727273 1. 0.66666667 0.75 0.77777778 0.75
|
|
0.69230769 0.88888889 0.83333333 1. ]
|
|
|
|
mean value: 0.8086247086247086
|
|
|
|
key: train_precision
|
|
value: [0.93670886 0.96 0.83673469 0.93902439 0.94318182 0.9375
|
|
0.86597938 0.91011236 0.95890411 0.81730769]
|
|
|
|
mean value: 0.9105453305953359
|
|
|
|
key: test_recall
|
|
value: [0.8 0.5 0.88888889 0.66666667 0.77777778 0.66666667
|
|
0.9 0.8 0.5 0.7 ]
|
|
|
|
mean value: 0.72
|
|
|
|
key: train_recall
|
|
value: [0.86046512 0.8372093 0.94252874 0.88505747 0.95402299 0.86206897
|
|
0.97674419 0.94186047 0.81395349 0.98837209]
|
|
|
|
mean value: 0.9062282812082331
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.74444444 0.73333333 0.78888889 0.73333333
|
|
0.72777778 0.84444444 0.69444444 0.85 ]
|
|
|
|
mean value: 0.7616666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.90116279 0.90116279 0.87824111 0.91345897 0.94794173 0.90196472
|
|
0.91365945 0.92495322 0.88973536 0.88499064]
|
|
|
|
mean value: 0.9057270783213045
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.5 0.61538462 0.54545455 0.63636364 0.54545455
|
|
0.64285714 0.72727273 0.45454545 0.7 ]
|
|
|
|
mean value: 0.5982717282717283
|
|
|
|
key: train_jcc
|
|
value: [0.81318681 0.80898876 0.7961165 0.83695652 0.90217391 0.81521739
|
|
0.84848485 0.86170213 0.78651685 0.80952381]
|
|
|
|
mean value: 0.8278867547773899
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01445127 0.01382184 0.01403117 0.01433516 0.01519608 0.01456809
|
|
0.0155673 0.01613855 0.01425195 0.01301265]
|
|
|
|
mean value: 0.014537405967712403
|
|
|
|
key: score_time
|
|
value: [0.01183462 0.01153708 0.01150513 0.01158881 0.01170182 0.01156878
|
|
0.01153612 0.01171589 0.01162171 0.01153803]
|
|
|
|
mean value: 0.011614799499511719
|
|
|
|
key: test_mcc
|
|
value: [0.50251891 0.34641016 0.58655573 0.48934516 0.48989795 0.47777778
|
|
0.45643546 0.68888889 0.68543653 0.41079192]
|
|
|
|
mean value: 0.5134058483288774
|
|
|
|
key: train_mcc
|
|
value: [0.66069587 0.76104239 0.75313123 0.69087173 0.61429087 0.83834744
|
|
0.72447934 0.86329252 0.84390411 0.49744201]
|
|
|
|
mean value: 0.7247497504621656
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.65 0.78947368 0.73684211 0.68421053 0.73684211
|
|
0.68421053 0.84210526 0.84210526 0.63157895]
|
|
|
|
mean value: 0.7347368421052631
|
|
|
|
key: train_accuracy
|
|
value: [0.80813953 0.87209302 0.87283237 0.8265896 0.77456647 0.91907514
|
|
0.84393064 0.93063584 0.91907514 0.69942197]
|
|
|
|
mean value: 0.8466359725769592
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.53333333 0.75 0.66666667 0.75 0.73684211
|
|
0.76923077 0.84210526 0.85714286 0.46153846]
|
|
|
|
mean value: 0.7103701561596298
|
|
|
|
key: train_fscore
|
|
value: [0.76595745 0.85714286 0.86419753 0.79452055 0.81690141 0.92045455
|
|
0.86432161 0.92771084 0.92307692 0.56666667]
|
|
|
|
mean value: 0.8300950377823305
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.8 0.85714286 0.83333333 0.6 0.7
|
|
0.625 0.88888889 0.81818182 1. ]
|
|
|
|
mean value: 0.7900324675324675
|
|
|
|
key: train_precision
|
|
value: [0.98181818 0.97058824 0.93333333 0.98305085 0.69047619 0.91011236
|
|
0.76106195 0.9625 0.875 1. ]
|
|
|
|
mean value: 0.9067941094832667
|
|
|
|
key: test_recall
|
|
value: [0.7 0.4 0.66666667 0.55555556 1. 0.77777778
|
|
1. 0.8 0.9 0.3 ]
|
|
|
|
mean value: 0.71
|
|
|
|
key: train_recall
|
|
value: [0.62790698 0.76744186 0.8045977 0.66666667 1. 0.93103448
|
|
1. 0.89534884 0.97674419 0.39534884]
|
|
|
|
mean value: 0.8065089548249131
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.65 0.78333333 0.72777778 0.7 0.73888889
|
|
0.66666667 0.84444444 0.83888889 0.65 ]
|
|
|
|
mean value: 0.735
|
|
|
|
key: train_roc_auc
|
|
value: [0.80813953 0.87209302 0.87322908 0.82751938 0.77325581 0.91900561
|
|
0.84482759 0.93043304 0.91940658 0.69767442]
|
|
|
|
mean value: 0.84655840684309
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.36363636 0.6 0.5 0.6 0.58333333
|
|
0.625 0.72727273 0.75 0.3 ]
|
|
|
|
mean value: 0.5632575757575757
|
|
|
|
key: train_jcc
|
|
value: [0.62068966 0.75 0.76086957 0.65909091 0.69047619 0.85263158
|
|
0.76106195 0.86516854 0.85714286 0.39534884]
|
|
|
|
mean value: 0.721248007948493
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11968398 0.10919642 0.10735869 0.10441184 0.10473728 0.10478425
|
|
0.10734653 0.11341166 0.11295152 0.10614848]
|
|
|
|
mean value: 0.10900306701660156
|
|
|
|
key: score_time
|
|
value: [0.01451898 0.0153327 0.01468682 0.01482487 0.01478434 0.01572394
|
|
0.01610613 0.0163517 0.01478457 0.01499701]
|
|
|
|
mean value: 0.015211105346679688
|
|
|
|
key: test_mcc
|
|
value: [0.8 0.43643578 0.41773368 0.57777778 0.68888889 0.80903983
|
|
0.72456884 0.68543653 0.59554321 0.80903983]
|
|
|
|
mean value: 0.6544464369563067
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.7 0.68421053 0.78947368 0.84210526 0.89473684
|
|
0.84210526 0.84210526 0.78947368 0.89473684]
|
|
|
|
mean value: 0.8178947368421052
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.9 0.625 0.72727273 0.77777778 0.84210526 0.9
|
|
0.82352941 0.85714286 0.77777778 0.88888889]
|
|
|
|
mean value: 0.8119494703782629
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.83333333 0.61538462 0.77777778 0.8 0.81818182
|
|
1. 0.81818182 0.875 1. ]
|
|
|
|
mean value: 0.8437859362859363
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9 0.5 0.88888889 0.77777778 0.88888889 1.
|
|
0.7 0.9 0.7 0.8 ]
|
|
|
|
mean value: 0.8055555555555556
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.7 0.69444444 0.78888889 0.84444444 0.9
|
|
0.85 0.83888889 0.79444444 0.9 ]
|
|
|
|
mean value: 0.8211111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.81818182 0.45454545 0.57142857 0.63636364 0.72727273 0.81818182
|
|
0.7 0.75 0.63636364 0.8 ]
|
|
|
|
mean value: 0.6912337662337662
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0448029 0.03644919 0.04084063 0.04396009 0.05736589 0.04805398
|
|
0.0431807 0.03746462 0.03794098 0.05181623]
|
|
|
|
mean value: 0.044187521934509276
|
|
|
|
key: score_time
|
|
value: [0.02483177 0.01738501 0.01699328 0.03721547 0.03116655 0.02299523
|
|
0.02297664 0.02221394 0.022434 0.02576184]
|
|
|
|
mean value: 0.02439737319946289
|
|
|
|
key: test_mcc
|
|
value: [0.90453403 0.50251891 0.78888889 0.9 0.89893315 0.9
|
|
0.68888889 0.68888889 0.4719399 0.80903983]
|
|
|
|
mean value: 0.7553632496660713
|
|
|
|
key: train_mcc
|
|
value: [0.96518153 0.97674419 0.95401685 0.95401685 0.95401685 0.95375568
|
|
0.97687784 0.96589116 0.96538359 0.96537897]
|
|
|
|
mean value: 0.9631263509312175
|
|
|
|
key: test_accuracy
|
|
value: [0.95 0.75 0.89473684 0.94736842 0.94736842 0.94736842
|
|
0.84210526 0.84210526 0.73684211 0.89473684]
|
|
|
|
mean value: 0.8752631578947369
|
|
|
|
key: train_accuracy
|
|
value: [0.98255814 0.98837209 0.97687861 0.97687861 0.97687861 0.97687861
|
|
0.98843931 0.98265896 0.98265896 0.98265896]
|
|
|
|
mean value: 0.981486086839629
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 0.73684211 0.88888889 0.94736842 0.94117647 0.94736842
|
|
0.84210526 0.84210526 0.76190476 0.88888889]
|
|
|
|
mean value: 0.8744016905007618
|
|
|
|
key: train_fscore
|
|
value: [0.98245614 0.98837209 0.97674419 0.97674419 0.97674419 0.97701149
|
|
0.98837209 0.98224852 0.98265896 0.98245614]
|
|
|
|
mean value: 0.9813807999388305
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.88888889 0.9 1. 0.9
|
|
0.88888889 0.88888889 0.72727273 1. ]
|
|
|
|
mean value: 0.8971717171717172
|
|
|
|
key: train_precision
|
|
value: [0.98823529 0.98837209 0.98823529 0.98823529 0.98823529 0.97701149
|
|
0.98837209 1. 0.97701149 0.98823529]
|
|
|
|
mean value: 0.9871943645140494
|
|
|
|
key: test_recall
|
|
value: [0.9 0.7 0.88888889 1. 0.88888889 1.
|
|
0.8 0.8 0.8 0.8 ]
|
|
|
|
mean value: 0.8577777777777778
|
|
|
|
key: train_recall
|
|
value: [0.97674419 0.98837209 0.96551724 0.96551724 0.96551724 0.97701149
|
|
0.98837209 0.96511628 0.98837209 0.97674419]
|
|
|
|
mean value: 0.9757284148623363
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 0.75 0.89444444 0.95 0.94444444 0.95
|
|
0.84444444 0.84444444 0.73333333 0.9 ]
|
|
|
|
mean value: 0.8761111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.98255814 0.98837209 0.97694467 0.97694467 0.97694467 0.97687784
|
|
0.98843892 0.98255814 0.98269179 0.98262497]
|
|
|
|
mean value: 0.981495589414595
|
|
|
|
key: test_jcc
|
|
value: [0.9 0.58333333 0.8 0.9 0.88888889 0.9
|
|
0.72727273 0.72727273 0.61538462 0.8 ]
|
|
|
|
mean value: 0.7842152292152292
|
|
|
|
key: train_jcc
|
|
value: [0.96551724 0.97701149 0.95454545 0.95454545 0.95454545 0.95505618
|
|
0.97701149 0.96511628 0.96590909 0.96551724]
|
|
|
|
mean value: 0.9634775384654871
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04223371 0.05334353 0.0519073 0.05333805 0.05607939 0.05932999
|
|
0.08690262 0.10090017 0.04979682 0.05732155]
|
|
|
|
mean value: 0.06111531257629395
|
|
|
|
key: score_time
|
|
value: [0.02555871 0.02488542 0.02664733 0.02691031 0.02732468 0.02355742
|
|
0.02250624 0.02430534 0.02258253 0.02257419]
|
|
|
|
mean value: 0.024685215950012208
|
|
|
|
key: test_mcc
|
|
value: [ 0.30151134 -0.21821789 0.38204659 -0.26666667 0.25844328 0.25844328
|
|
-0.06900656 0.41773368 0.59554321 0.57777778]
|
|
|
|
mean value: 0.22376080478351695
|
|
|
|
key: train_mcc
|
|
value: [0.98843892 0.98843892 0.98850422 0.98850422 1. 1.
|
|
1. 0.98850575 1. 1. ]
|
|
|
|
mean value: 0.9942392021754873
|
|
|
|
key: test_accuracy
|
|
value: [0.65 0.4 0.68421053 0.36842105 0.63157895 0.63157895
|
|
0.47368421 0.68421053 0.78947368 0.78947368]
|
|
|
|
mean value: 0.6102631578947368
|
|
|
|
key: train_accuracy
|
|
value: [0.99418605 0.99418605 0.99421965 0.99421965 1. 1.
|
|
1. 0.99421965 1. 1. ]
|
|
|
|
mean value: 0.9971031052560828
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.25 0.7 0.33333333 0.58823529 0.58823529
|
|
0.54545455 0.625 0.77777778 0.8 ]
|
|
|
|
mean value: 0.5874702911467617
|
|
|
|
key: train_fscore
|
|
value: [0.99421965 0.99421965 0.99428571 0.99428571 1. 1.
|
|
1. 0.99421965 1. 1. ]
|
|
|
|
mean value: 0.9971230388109
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.33333333 0.63636364 0.33333333 0.625 0.625
|
|
0.5 0.83333333 0.875 0.8 ]
|
|
|
|
mean value: 0.6197727272727273
|
|
|
|
key: train_precision
|
|
value: [0.98850575 0.98850575 0.98863636 0.98863636 1. 1.
|
|
1. 0.98850575 1. 1. ]
|
|
|
|
mean value: 0.9942789968652037
|
|
|
|
key: test_recall
|
|
value: [0.7 0.2 0.77777778 0.33333333 0.55555556 0.55555556
|
|
0.6 0.5 0.7 0.8 ]
|
|
|
|
mean value: 0.5722222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.65 0.4 0.68888889 0.36666667 0.62777778 0.62777778
|
|
0.46666667 0.69444444 0.79444444 0.78888889]
|
|
|
|
mean value: 0.6105555555555555
|
|
|
|
key: train_roc_auc
|
|
value: [0.99418605 0.99418605 0.99418605 0.99418605 1. 1.
|
|
1. 0.99425287 1. 1. ]
|
|
|
|
mean value: 0.997099705960973
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.14285714 0.53846154 0.2 0.41666667 0.41666667
|
|
0.375 0.45454545 0.63636364 0.66666667]
|
|
|
|
mean value: 0.4347227772227772
|
|
|
|
key: train_jcc
|
|
value: [0.98850575 0.98850575 0.98863636 0.98863636 1. 1.
|
|
1. 0.98850575 1. 1. ]
|
|
|
|
mean value: 0.9942789968652037
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.37226057 0.36622763 0.36311316 0.39109921 0.38581228 0.35312366
|
|
0.32957363 0.33011913 0.32138824 0.32526827]
|
|
|
|
mean value: 0.35379858016967775
|
|
|
|
key: score_time
|
|
value: [0.00946259 0.00942874 0.0094924 0.01565409 0.01205778 0.00955963
|
|
0.00943923 0.00940394 0.00933862 0.00913787]
|
|
|
|
mean value: 0.010297489166259766
|
|
|
|
key: test_mcc
|
|
value: [0.90453403 0.65465367 0.78888889 0.9 0.80507649 0.78888889
|
|
0.59554321 0.78888889 0.68888889 0.80903983]
|
|
|
|
mean value: 0.7724402792694658
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95 0.8 0.89473684 0.94736842 0.89473684 0.89473684
|
|
0.78947368 0.89473684 0.84210526 0.89473684]
|
|
|
|
mean value: 0.8802631578947369
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 0.75 0.88888889 0.94736842 0.875 0.88888889
|
|
0.77777778 0.9 0.84210526 0.88888889]
|
|
|
|
mean value: 0.8706286549707603
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.88888889 0.9 1. 0.88888889
|
|
0.875 0.9 0.88888889 1. ]
|
|
|
|
mean value: 0.9341666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9 0.6 0.88888889 1. 0.77777778 0.88888889
|
|
0.7 0.9 0.8 0.8 ]
|
|
|
|
mean value: 0.8255555555555556
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 0.8 0.89444444 0.95 0.88888889 0.89444444
|
|
0.79444444 0.89444444 0.84444444 0.9 ]
|
|
|
|
mean value: 0.8811111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 0.6 0.8 0.9 0.77777778 0.8
|
|
0.63636364 0.81818182 0.72727273 0.8 ]
|
|
|
|
mean value: 0.775959595959596
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02142334 0.02192593 0.02348685 0.02317166 0.020576 0.0327661
|
|
0.02014709 0.02839041 0.02045655 0.0206368 ]
|
|
|
|
mean value: 0.023298072814941406
|
|
|
|
key: score_time
|
|
value: [0.01236153 0.01455808 0.02339935 0.01791406 0.01864529 0.0121305
|
|
0.01734853 0.01214361 0.01763248 0.01902938]
|
|
|
|
mean value: 0.016516280174255372
|
|
|
|
key: test_mcc
|
|
value: [ 0.30151134 0.5 0.19096397 -0.19096397 0.15555556 0.28752732
|
|
-0.26666667 0.47777778 -0.05555556 -0.15118579]
|
|
|
|
mean value: 0.12489639828778315
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.65 0.7 0.57894737 0.42105263 0.57894737 0.63157895
|
|
0.36842105 0.73684211 0.47368421 0.42105263]
|
|
|
|
mean value: 0.5560526315789474
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.57142857 0.63636364 0.26666667 0.55555556 0.66666667
|
|
0.4 0.73684211 0.5 0.26666667]
|
|
|
|
mean value: 0.5266856535277588
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.63636364 1. 0.53846154 0.33333333 0.55555556 0.58333333
|
|
0.4 0.77777778 0.5 0.4 ]
|
|
|
|
mean value: 0.5724825174825174
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.7 0.4 0.77777778 0.22222222 0.55555556 0.77777778
|
|
0.4 0.7 0.5 0.2 ]
|
|
|
|
mean value: 0.5233333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.65 0.7 0.58888889 0.41111111 0.57777778 0.63888889
|
|
0.36666667 0.73888889 0.47222222 0.43333333]
|
|
|
|
mean value: 0.5577777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4 0.46666667 0.15384615 0.38461538 0.5
|
|
0.25 0.58333333 0.33333333 0.15384615]
|
|
|
|
mean value: 0.37256410256410255
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03142953 0.03767943 0.04178333 0.03538585 0.03553176 0.03384566
|
|
0.03206038 0.03371143 0.03961039 0.03351355]
|
|
|
|
mean value: 0.03545513153076172
|
|
|
|
key: score_time
|
|
value: [0.02319789 0.02398062 0.02130103 0.02195907 0.01977348 0.02230453
|
|
0.02180648 0.02132511 0.02320027 0.02219987]
|
|
|
|
mean value: 0.022104835510253905
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.57735027 0.57777778 0.68888889 0.36666667 0.57777778
|
|
0.68888889 0.68888889 0.50604808 0.64450339]
|
|
|
|
mean value: 0.5971444292273042
|
|
|
|
key: train_mcc
|
|
value: [0.88467837 0.89540937 0.91913927 0.8846411 0.89600322 0.87337886
|
|
0.88461031 0.86147888 0.86147888 0.8843892 ]
|
|
|
|
mean value: 0.884520746103757
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.75 0.78947368 0.84210526 0.68421053 0.78947368
|
|
0.84210526 0.84210526 0.73684211 0.78947368]
|
|
|
|
mean value: 0.786578947368421
|
|
|
|
key: train_accuracy
|
|
value: [0.94186047 0.94767442 0.95953757 0.94219653 0.94797688 0.93641618
|
|
0.94219653 0.93063584 0.93063584 0.94219653]
|
|
|
|
mean value: 0.9421326791235382
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.66666667 0.77777778 0.84210526 0.66666667 0.77777778
|
|
0.84210526 0.84210526 0.70588235 0.75 ]
|
|
|
|
mean value: 0.7704420364637082
|
|
|
|
key: train_fscore
|
|
value: [0.94047619 0.94736842 0.95953757 0.94186047 0.94857143 0.93567251
|
|
0.94117647 0.92941176 0.92941176 0.94186047]
|
|
|
|
mean value: 0.9415347057207026
|
|
|
|
key: test_precision
|
|
value: [0.71428571 1. 0.77777778 0.8 0.66666667 0.77777778
|
|
0.88888889 0.88888889 0.85714286 1. ]
|
|
|
|
mean value: 0.8371428571428572
|
|
|
|
key: train_precision
|
|
value: [0.96341463 0.95294118 0.96511628 0.95294118 0.94318182 0.95238095
|
|
0.95238095 0.94047619 0.94047619 0.94186047]
|
|
|
|
mean value: 0.9505169835169668
|
|
|
|
key: test_recall
|
|
value: [1. 0.5 0.77777778 0.88888889 0.66666667 0.77777778
|
|
0.8 0.8 0.6 0.6 ]
|
|
|
|
mean value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
0.741111111111111
|
|
|
|
key: train_recall
|
|
value: [0.91860465 0.94186047 0.95402299 0.93103448 0.95402299 0.91954023
|
|
0.93023256 0.91860465 0.91860465 0.94186047]
|
|
|
|
mean value: 0.9328388131515637
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.75 0.78888889 0.84444444 0.68333333 0.78888889
|
|
0.84444444 0.84444444 0.74444444 0.8 ]
|
|
|
|
mean value: 0.7888888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.94186047 0.94767442 0.95956963 0.94226143 0.94794173 0.9365143
|
|
0.94212777 0.93056669 0.93056669 0.9421946 ]
|
|
|
|
mean value: 0.942127773322641
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.5 0.63636364 0.72727273 0.5 0.63636364
|
|
0.72727273 0.72727273 0.54545455 0.6 ]
|
|
|
|
mean value: 0.6314285714285715
|
|
|
|
key: train_jcc
|
|
value: [0.88764045 0.9 0.92222222 0.89010989 0.90217391 0.87912088
|
|
0.88888889 0.86813187 0.86813187 0.89010989]
|
|
|
|
mean value: 0.8896529869197187
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.43619275 0.37437773 0.40935063 0.30936551 0.25046778 0.28681684
|
|
0.27543068 0.32021427 0.41071773 0.42350101]
|
|
|
|
mean value: 0.34964349269866946
|
|
|
|
key: score_time
|
|
value: [0.02428436 0.02053189 0.01183033 0.02394629 0.01907516 0.0117991
|
|
0.02131462 0.02312541 0.02282095 0.0234828 ]
|
|
|
|
mean value: 0.02022109031677246
|
|
|
|
key: test_mcc
|
|
value: [0.50251891 0.52414242 0.68888889 0.26666667 0.36666667 0.57777778
|
|
0.68543653 0.80903983 0.50604808 0.72456884]
|
|
|
|
mean value: 0.5651754601944616
|
|
|
|
key: train_mcc
|
|
value: [0.77268329 0.77954422 0.75856599 0.76966317 0.89600322 0.87337886
|
|
0.75959752 0.75840517 0.86147888 0.78076537]
|
|
|
|
mean value: 0.8010085689776837
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.84210526 0.63157895 0.68421053 0.78947368
|
|
0.84210526 0.89473684 0.73684211 0.84210526]
|
|
|
|
mean value: 0.7763157894736842
|
|
|
|
key: train_accuracy
|
|
value: [0.88372093 0.88953488 0.87861272 0.88439306 0.94797688 0.93641618
|
|
0.87861272 0.87861272 0.93063584 0.89017341]
|
|
|
|
mean value: 0.8998689339965049
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.70588235 0.84210526 0.63157895 0.66666667 0.77777778
|
|
0.85714286 0.88888889 0.70588235 0.82352941]
|
|
|
|
mean value: 0.7661359280554327
|
|
|
|
key: train_fscore
|
|
value: [0.87654321 0.88757396 0.87573964 0.88235294 0.94857143 0.93567251
|
|
0.87272727 0.8742515 0.92941176 0.88757396]
|
|
|
|
mean value: 0.8970418202647966
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.85714286 0.8 0.6 0.66666667 0.77777778
|
|
0.81818182 1. 0.85714286 1. ]
|
|
|
|
mean value: 0.8104184704184704
|
|
|
|
key: train_precision
|
|
value: [0.93421053 0.90361446 0.90243902 0.90361446 0.94318182 0.95238095
|
|
0.91139241 0.90123457 0.94047619 0.90361446]
|
|
|
|
mean value: 0.9196158858203496
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 0.88888889 0.66666667 0.66666667 0.77777778
|
|
0.9 0.8 0.6 0.7 ]
|
|
|
|
mean value: 0.74
|
|
|
|
key: train_recall
|
|
value: [0.8255814 0.87209302 0.85057471 0.86206897 0.95402299 0.91954023
|
|
0.8372093 0.84883721 0.91860465 0.87209302]
|
|
|
|
mean value: 0.8760625501202887
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.84444444 0.63333333 0.68333333 0.78888889
|
|
0.83888889 0.9 0.74444444 0.85 ]
|
|
|
|
mean value: 0.7783333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.88372093 0.88953488 0.87877573 0.88452285 0.94794173 0.9365143
|
|
0.87837477 0.87844159 0.93056669 0.8900695 ]
|
|
|
|
mean value: 0.899846297781342
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.54545455 0.72727273 0.46153846 0.5 0.63636364
|
|
0.75 0.8 0.54545455 0.7 ]
|
|
|
|
mean value: 0.6281468531468531
|
|
|
|
key: train_jcc
|
|
value: [0.78021978 0.79787234 0.77894737 0.78947368 0.90217391 0.87912088
|
|
0.77419355 0.77659574 0.86813187 0.79787234]
|
|
|
|
mean value: 0.8144601467066597
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04268003 0.04425907 0.09252167 0.19389081 0.17087245 0.11250997
|
|
0.11579156 0.05681252 0.05521941 0.06854796]
|
|
|
|
mean value: 0.09531054496765137
|
|
|
|
key: score_time
|
|
value: [0.01231527 0.01466823 0.02190828 0.01713824 0.01801777 0.01830268
|
|
0.02358699 0.01193404 0.01280117 0.02229571]
|
|
|
|
mean value: 0.017296838760375976
|
|
|
|
key: test_mcc
|
|
value: [0.72923266 0.86440678 0.88148255 0.72923266 0.72923266 0.78247589
|
|
0.62856487 0.79706825 0.79661017 0.75106762]
|
|
|
|
mean value: 0.7689374110044281
|
|
|
|
key: train_mcc
|
|
value: [0.83993808 0.82676545 0.82299887 0.83060275 0.85123769 0.84186165
|
|
0.80986322 0.84004534 0.83246405 0.82298136]
|
|
|
|
mean value: 0.831875845403522
|
|
|
|
key: test_accuracy
|
|
value: [0.86440678 0.93220339 0.94067797 0.86440678 0.86440678 0.88983051
|
|
0.81355932 0.89830508 0.89830508 0.87288136]
|
|
|
|
mean value: 0.8838983050847458
|
|
|
|
key: train_accuracy
|
|
value: [0.91996234 0.913371 0.91148776 0.91525424 0.92561205 0.92090395
|
|
0.90489642 0.91996234 0.91619586 0.91148776]
|
|
|
|
mean value: 0.9159133709981168
|
|
|
|
key: test_fscore
|
|
value: [0.86666667 0.93220339 0.94117647 0.86206897 0.86206897 0.89430894
|
|
0.80701754 0.89655172 0.89830508 0.88 ]
|
|
|
|
mean value: 0.8840367753952667
|
|
|
|
key: train_fscore
|
|
value: [0.92018779 0.91369606 0.91181989 0.91588785 0.9258216 0.92134831
|
|
0.9042654 0.92063492 0.91674462 0.91165414]
|
|
|
|
mean value: 0.9162060582170681
|
|
|
|
key: test_precision
|
|
value: [0.85245902 0.93220339 0.93333333 0.87719298 0.87719298 0.859375
|
|
0.83636364 0.9122807 0.89830508 0.83333333]
|
|
|
|
mean value: 0.8812039460666683
|
|
|
|
key: train_precision
|
|
value: [0.917603 0.91028037 0.90841121 0.90909091 0.92322097 0.91620112
|
|
0.91030534 0.91296296 0.91078067 0.90994371]
|
|
|
|
mean value: 0.9128800275673004
|
|
|
|
key: test_recall
|
|
value: [0.88135593 0.93220339 0.94915254 0.84745763 0.84745763 0.93220339
|
|
0.77966102 0.88135593 0.89830508 0.93220339]
|
|
|
|
mean value: 0.888135593220339
|
|
|
|
key: train_recall
|
|
value: [0.92278719 0.91713748 0.91525424 0.92278719 0.92843691 0.92655367
|
|
0.89830508 0.92843691 0.92278719 0.913371 ]
|
|
|
|
mean value: 0.9195856873822975
|
|
|
|
key: test_roc_auc
|
|
value: [0.86440678 0.93220339 0.94067797 0.86440678 0.86440678 0.88983051
|
|
0.81355932 0.89830508 0.89830508 0.87288136]
|
|
|
|
mean value: 0.8838983050847458
|
|
|
|
key: train_roc_auc
|
|
value: [0.91996234 0.913371 0.91148776 0.91525424 0.92561205 0.92090395
|
|
0.90489642 0.91996234 0.91619586 0.91148776]
|
|
|
|
mean value: 0.9159133709981168
|
|
|
|
key: test_jcc
|
|
value: [0.76470588 0.87301587 0.88888889 0.75757576 0.75757576 0.80882353
|
|
0.67647059 0.8125 0.81538462 0.78571429]
|
|
|
|
mean value: 0.7940655178155178
|
|
|
|
key: train_jcc
|
|
value: [0.85217391 0.84110535 0.83793103 0.84482759 0.86188811 0.85416667
|
|
0.82525952 0.85294118 0.8462867 0.83765112]
|
|
|
|
mean value: 0.8454231182222354
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.53051233 1.36907029 1.52268577 1.22205734 1.56492877 1.36481237
|
|
1.33700252 2.12137532 2.19158459 1.81853867]
|
|
|
|
mean value: 1.604256796836853
|
|
|
|
key: score_time
|
|
value: [0.01503825 0.01568294 0.02346444 0.01674342 0.01266789 0.01989841
|
|
0.01630116 0.01646376 0.01633501 0.01584148]
|
|
|
|
mean value: 0.016843676567077637
|
|
|
|
key: test_mcc
|
|
value: [0.88762536 0.9029865 0.9003767 0.9003767 0.86891154 0.86640023
|
|
0.85749293 0.86640023 0.91855865 0.80076161]
|
|
|
|
mean value: 0.876989044944138
|
|
|
|
key: train_mcc
|
|
value: [0.95729852 0.94823835 0.94086488 0.94976074 0.95381419 0.96654057
|
|
0.95548024 0.95352963 0.95185308 0.94794193]
|
|
|
|
mean value: 0.9525322124690063
|
|
|
|
key: test_accuracy
|
|
value: [0.94067797 0.94915254 0.94915254 0.94915254 0.93220339 0.93220339
|
|
0.92372881 0.93220339 0.95762712 0.89830508]
|
|
|
|
mean value: 0.9364406779661016
|
|
|
|
key: train_accuracy
|
|
value: [0.97834275 0.97363465 0.96986817 0.97457627 0.97645951 0.98305085
|
|
0.97740113 0.97645951 0.97551789 0.97363465]
|
|
|
|
mean value: 0.975894538606403
|
|
|
|
key: test_fscore
|
|
value: [0.944 0.9516129 0.95081967 0.95081967 0.93548387 0.93442623
|
|
0.92913386 0.93442623 0.95934959 0.90322581]
|
|
|
|
mean value: 0.9393297835687502
|
|
|
|
key: train_fscore
|
|
value: [0.9787234 0.97421731 0.97058824 0.97502313 0.97695853 0.98330241
|
|
0.97781885 0.97687327 0.97601476 0.974122 ]
|
|
|
|
mean value: 0.9763641890657068
|
|
|
|
key: test_precision
|
|
value: [0.89393939 0.90769231 0.92063492 0.92063492 0.89230769 0.9047619
|
|
0.86764706 0.9047619 0.921875 0.86153846]
|
|
|
|
mean value: 0.8995793565095036
|
|
|
|
key: train_precision
|
|
value: [0.96181818 0.95315315 0.94793537 0.95818182 0.9566787 0.96892139
|
|
0.9600726 0.96 0.95660036 0.95644283]
|
|
|
|
mean value: 0.9579804399114892
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.98305085 0.98305085 0.98305085 0.96610169
|
|
1. 0.96610169 1. 0.94915254]
|
|
|
|
mean value: 0.9830508474576272
|
|
|
|
key: train_recall
|
|
value: [0.99623352 0.99623352 0.99435028 0.99246704 0.99811676 0.99811676
|
|
0.99623352 0.99435028 0.99623352 0.99246704]
|
|
|
|
mean value: 0.9954802259887006
|
|
|
|
key: test_roc_auc
|
|
value: [0.94067797 0.94915254 0.94915254 0.94915254 0.93220339 0.93220339
|
|
0.92372881 0.93220339 0.95762712 0.89830508]
|
|
|
|
mean value: 0.9364406779661018
|
|
|
|
key: train_roc_auc
|
|
value: [0.97834275 0.97363465 0.96986817 0.97457627 0.97645951 0.98305085
|
|
0.97740113 0.97645951 0.97551789 0.97363465]
|
|
|
|
mean value: 0.975894538606403
|
|
|
|
key: test_jcc
|
|
value: [0.89393939 0.90769231 0.90625 0.90625 0.87878788 0.87692308
|
|
0.86764706 0.87692308 0.921875 0.82352941]
|
|
|
|
mean value: 0.8859817204853969
|
|
|
|
key: train_jcc
|
|
value: [0.95833333 0.9497307 0.94285714 0.95126354 0.95495495 0.96715328
|
|
0.95660036 0.95479204 0.95315315 0.94954955]
|
|
|
|
mean value: 0.9538388061668628
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01925063 0.01500678 0.0184629 0.01388073 0.01853728 0.01608038
|
|
0.01470423 0.02137613 0.01897025 0.02235484]
|
|
|
|
mean value: 0.017862415313720702
|
|
|
|
key: score_time
|
|
value: [0.01378465 0.01395178 0.01073766 0.01013851 0.01564693 0.01131177
|
|
0.01113129 0.01369214 0.01284933 0.01564264]
|
|
|
|
mean value: 0.012888669967651367
|
|
|
|
key: test_mcc
|
|
value: [0.53032146 0.49265895 0.69651783 0.6303868 0.52663543 0.60192927
|
|
0.44854261 0.64490202 0.54879547 0.64639313]
|
|
|
|
mean value: 0.5767082974661936
|
|
|
|
key: train_mcc
|
|
value: [0.58651562 0.60286121 0.60049573 0.61300342 0.61524237 0.5918404
|
|
0.60692333 0.60210861 0.58851611 0.60165235]
|
|
|
|
mean value: 0.600915914604893
|
|
|
|
key: test_accuracy
|
|
value: [0.76271186 0.74576271 0.84745763 0.81355932 0.76271186 0.79661017
|
|
0.72033898 0.8220339 0.77118644 0.8220339 ]
|
|
|
|
mean value: 0.7864406779661017
|
|
|
|
key: train_accuracy
|
|
value: [0.79190207 0.79943503 0.79755179 0.80508475 0.80602637 0.78719397
|
|
0.80131827 0.79943503 0.79284369 0.79849341]
|
|
|
|
mean value: 0.7979284369114877
|
|
|
|
key: test_fscore
|
|
value: [0.74545455 0.73684211 0.84210526 0.80357143 0.75438596 0.77777778
|
|
0.69158879 0.8173913 0.75229358 0.81415929]
|
|
|
|
mean value: 0.773557004454869
|
|
|
|
key: train_fscore
|
|
value: [0.78140455 0.78721279 0.78304743 0.79525223 0.79563492 0.75802998
|
|
0.78878879 0.78848064 0.78217822 0.78514056]
|
|
|
|
mean value: 0.7845170093156546
|
|
|
|
key: test_precision
|
|
value: [0.80392157 0.76363636 0.87272727 0.8490566 0.78181818 0.85714286
|
|
0.77083333 0.83928571 0.82 0.85185185]
|
|
|
|
mean value: 0.821027374719661
|
|
|
|
key: train_precision
|
|
value: [0.82291667 0.83829787 0.84347826 0.8375 0.84067086 0.87841191
|
|
0.84188034 0.83403361 0.82463466 0.84086022]
|
|
|
|
mean value: 0.840268439599726
|
|
|
|
key: test_recall
|
|
value: [0.69491525 0.71186441 0.81355932 0.76271186 0.72881356 0.71186441
|
|
0.62711864 0.79661017 0.69491525 0.77966102]
|
|
|
|
mean value: 0.7322033898305085
|
|
|
|
key: train_recall
|
|
value: [0.74387947 0.74199623 0.7306968 0.75706215 0.75517891 0.66666667
|
|
0.74199623 0.74764595 0.74387947 0.73634652]
|
|
|
|
mean value: 0.7365348399246704
|
|
|
|
key: test_roc_auc
|
|
value: [0.76271186 0.74576271 0.84745763 0.81355932 0.76271186 0.79661017
|
|
0.72033898 0.8220339 0.77118644 0.8220339 ]
|
|
|
|
mean value: 0.7864406779661017
|
|
|
|
key: train_roc_auc
|
|
value: [0.79190207 0.79943503 0.79755179 0.80508475 0.80602637 0.78719397
|
|
0.80131827 0.79943503 0.79284369 0.79849341]
|
|
|
|
mean value: 0.7979284369114877
|
|
|
|
key: test_jcc
|
|
value: [0.5942029 0.58333333 0.72727273 0.67164179 0.6056338 0.63636364
|
|
0.52857143 0.69117647 0.60294118 0.68656716]
|
|
|
|
mean value: 0.6327704429191455
|
|
|
|
key: train_jcc
|
|
value: [0.64123377 0.6490939 0.64344942 0.66009852 0.66062603 0.61034483
|
|
0.65123967 0.65081967 0.64227642 0.64628099]
|
|
|
|
mean value: 0.6455463225710825
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01731157 0.01772642 0.01778531 0.01752996 0.01791835 0.02287078
|
|
0.02791548 0.01764393 0.01947856 0.01820731]
|
|
|
|
mean value: 0.019438767433166505
|
|
|
|
key: score_time
|
|
value: [0.01291466 0.0128932 0.01276016 0.01268792 0.01274586 0.01271653
|
|
0.0129838 0.01313806 0.01300859 0.01295424]
|
|
|
|
mean value: 0.012880301475524903
|
|
|
|
key: test_mcc
|
|
value: [0.44381268 0.72881356 0.59631184 0.52542373 0.5770176 0.56453575
|
|
0.37336433 0.61450987 0.47464445 0.45178123]
|
|
|
|
mean value: 0.5350215037451828
|
|
|
|
key: train_mcc
|
|
value: [0.5221909 0.5502582 0.51633818 0.57287155 0.57884874 0.53587577
|
|
0.59180008 0.55766134 0.57672237 0.56061191]
|
|
|
|
mean value: 0.5563179042103773
|
|
|
|
key: test_accuracy
|
|
value: [0.72033898 0.86440678 0.79661017 0.76271186 0.78813559 0.77966102
|
|
0.68644068 0.80508475 0.73728814 0.72033898]
|
|
|
|
mean value: 0.7661016949152543
|
|
|
|
key: train_accuracy
|
|
value: [0.76082863 0.77495292 0.75800377 0.78625235 0.78907721 0.76741996
|
|
0.79566855 0.7787194 0.78813559 0.77966102]
|
|
|
|
mean value: 0.7778719397363465
|
|
|
|
key: test_fscore
|
|
value: [0.736 0.86440678 0.80645161 0.76271186 0.7826087 0.79365079
|
|
0.67826087 0.816 0.73504274 0.7480916 ]
|
|
|
|
mean value: 0.7723224953935377
|
|
|
|
key: train_fscore
|
|
value: [0.76611418 0.77890842 0.76225717 0.79000925 0.79411765 0.77442922
|
|
0.79963066 0.7818013 0.79224377 0.78688525]
|
|
|
|
mean value: 0.7826396858102938
|
|
|
|
key: test_precision
|
|
value: [0.6969697 0.86440678 0.76923077 0.76271186 0.80357143 0.74626866
|
|
0.69642857 0.77272727 0.74137931 0.68055556]
|
|
|
|
mean value: 0.7534249905612337
|
|
|
|
key: train_precision
|
|
value: [0.74954955 0.76545455 0.74909091 0.77636364 0.77558348 0.75177305
|
|
0.78442029 0.77106227 0.77717391 0.76190476]
|
|
|
|
mean value: 0.7662376408913959
|
|
|
|
key: test_recall
|
|
value: [0.77966102 0.86440678 0.84745763 0.76271186 0.76271186 0.84745763
|
|
0.66101695 0.86440678 0.72881356 0.83050847]
|
|
|
|
mean value: 0.7949152542372881
|
|
|
|
key: train_recall
|
|
value: [0.7834275 0.79284369 0.77589454 0.80414313 0.81355932 0.79849341
|
|
0.81544256 0.79284369 0.8079096 0.81355932]
|
|
|
|
mean value: 0.7998116760828625
|
|
|
|
key: test_roc_auc
|
|
value: [0.72033898 0.86440678 0.79661017 0.76271186 0.78813559 0.77966102
|
|
0.68644068 0.80508475 0.73728814 0.72033898]
|
|
|
|
mean value: 0.7661016949152541
|
|
|
|
key: train_roc_auc
|
|
value: [0.76082863 0.77495292 0.75800377 0.78625235 0.78907721 0.76741996
|
|
0.79566855 0.7787194 0.78813559 0.77966102]
|
|
|
|
mean value: 0.7778719397363465
|
|
|
|
key: test_jcc
|
|
value: [0.58227848 0.76119403 0.67567568 0.61643836 0.64285714 0.65789474
|
|
0.51315789 0.68918919 0.58108108 0.59756098]
|
|
|
|
mean value: 0.631732756301958
|
|
|
|
key: train_jcc
|
|
value: [0.62089552 0.63787879 0.61584454 0.6529052 0.65853659 0.6318927
|
|
0.66615385 0.64176829 0.6559633 0.64864865]
|
|
|
|
mean value: 0.6430487426209308
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01591873 0.02098083 0.01996851 0.02011347 0.01982379 0.0167973
|
|
0.02340651 0.01341724 0.01726842 0.01364994]
|
|
|
|
mean value: 0.018134474754333496
|
|
|
|
key: score_time
|
|
value: [0.03239775 0.0361948 0.0501852 0.02540565 0.02645135 0.02349544
|
|
0.02616739 0.02158642 0.02533984 0.02187085]
|
|
|
|
mean value: 0.028909468650817872
|
|
|
|
key: test_mcc
|
|
value: [0.75907212 0.79357539 0.77899453 0.78889349 0.63632522 0.81934649
|
|
0.70099964 0.68675171 0.80076161 0.80403577]
|
|
|
|
mean value: 0.7568755970809651
|
|
|
|
key: train_mcc
|
|
value: [0.84037101 0.84295362 0.84629977 0.84629977 0.8399339 0.83701635
|
|
0.83961626 0.84675928 0.83950958 0.8548474 ]
|
|
|
|
mean value: 0.843360694874403
|
|
|
|
key: test_accuracy
|
|
value: [0.87288136 0.88983051 0.88135593 0.88983051 0.81355932 0.90677966
|
|
0.83898305 0.83050847 0.89830508 0.89830508]
|
|
|
|
mean value: 0.8720338983050847
|
|
|
|
key: train_accuracy
|
|
value: [0.91619586 0.91713748 0.91902072 0.91902072 0.91619586 0.91431262
|
|
0.91525424 0.91902072 0.91619586 0.92278719]
|
|
|
|
mean value: 0.9175141242937853
|
|
|
|
key: test_fscore
|
|
value: [0.88372093 0.89922481 0.89230769 0.8976378 0.828125 0.912
|
|
0.85714286 0.85074627 0.90322581 0.9047619 ]
|
|
|
|
mean value: 0.8828893061030483
|
|
|
|
key: train_fscore
|
|
value: [0.9215859 0.92267135 0.92429577 0.92429577 0.92144748 0.91996482
|
|
0.92105263 0.92442882 0.92130858 0.92807018]
|
|
|
|
mean value: 0.9229121315880182
|
|
|
|
key: test_precision
|
|
value: [0.81428571 0.82857143 0.81690141 0.83823529 0.76811594 0.86363636
|
|
0.77027027 0.76 0.86153846 0.85074627]
|
|
|
|
mean value: 0.8172301151556292
|
|
|
|
key: train_precision
|
|
value: [0.86589404 0.86490939 0.8677686 0.8677686 0.86710963 0.8630363
|
|
0.86206897 0.86655684 0.86833333 0.86863711]
|
|
|
|
mean value: 0.8662082804214208
|
|
|
|
key: test_recall
|
|
value: [0.96610169 0.98305085 0.98305085 0.96610169 0.89830508 0.96610169
|
|
0.96610169 0.96610169 0.94915254 0.96610169]
|
|
|
|
mean value: 0.9610169491525424
|
|
|
|
key: train_recall
|
|
value: [0.98493409 0.98870056 0.98870056 0.98870056 0.98305085 0.98493409
|
|
0.98870056 0.9905838 0.98116761 0.99623352]
|
|
|
|
mean value: 0.9875706214689266
|
|
|
|
key: test_roc_auc
|
|
value: [0.87288136 0.88983051 0.88135593 0.88983051 0.81355932 0.90677966
|
|
0.83898305 0.83050847 0.89830508 0.89830508]
|
|
|
|
mean value: 0.8720338983050848
|
|
|
|
key: train_roc_auc
|
|
value: [0.91619586 0.91713748 0.91902072 0.91902072 0.91619586 0.91431262
|
|
0.91525424 0.91902072 0.91619586 0.92278719]
|
|
|
|
mean value: 0.9175141242937852
|
|
|
|
key: test_jcc
|
|
value: [0.79166667 0.81690141 0.80555556 0.81428571 0.70666667 0.83823529
|
|
0.75 0.74025974 0.82352941 0.82608696]
|
|
|
|
mean value: 0.791318741428914
|
|
|
|
key: train_jcc
|
|
value: [0.85457516 0.85644372 0.85924714 0.85924714 0.85433715 0.85179153
|
|
0.85365854 0.85947712 0.85409836 0.86579378]
|
|
|
|
mean value: 0.8568669639762804
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.064255 0.05905032 0.05888295 0.05968857 0.08164549 0.06412482
|
|
0.05660295 0.05877948 0.05916452 0.05902314]
|
|
|
|
mean value: 0.06212172508239746
|
|
|
|
key: score_time
|
|
value: [0.02126884 0.02165389 0.02142501 0.02128029 0.02214193 0.02172971
|
|
0.02206707 0.02146077 0.02109408 0.02127147]
|
|
|
|
mean value: 0.021539306640625
|
|
|
|
key: test_mcc
|
|
value: [0.78067087 0.8136762 0.83242375 0.73049431 0.69651783 0.86490385
|
|
0.67884423 0.74672866 0.83098605 0.71692818]
|
|
|
|
mean value: 0.7692173927403629
|
|
|
|
key: train_mcc
|
|
value: [0.83053204 0.85007077 0.82874428 0.82880309 0.80982875 0.82302806
|
|
0.81544835 0.82683584 0.82676545 0.82874428]
|
|
|
|
mean value: 0.826880091339201
|
|
|
|
key: test_accuracy
|
|
value: [0.88983051 0.90677966 0.91525424 0.86440678 0.84745763 0.93220339
|
|
0.83898305 0.87288136 0.91525424 0.8559322 ]
|
|
|
|
mean value: 0.8838983050847458
|
|
|
|
key: train_accuracy
|
|
value: [0.91525424 0.92467043 0.91431262 0.91431262 0.90489642 0.91148776
|
|
0.90772128 0.913371 0.913371 0.91431262]
|
|
|
|
mean value: 0.9133709981167608
|
|
|
|
key: test_fscore
|
|
value: [0.89256198 0.90756303 0.91803279 0.86885246 0.84210526 0.93333333
|
|
0.83478261 0.87603306 0.91666667 0.864 ]
|
|
|
|
mean value: 0.8853931184287585
|
|
|
|
key: train_fscore
|
|
value: [0.91557223 0.92619926 0.91503268 0.91519105 0.90534208 0.91198502
|
|
0.90789474 0.91401869 0.91369606 0.91503268]
|
|
|
|
mean value: 0.9139964495032054
|
|
|
|
key: test_precision
|
|
value: [0.87096774 0.9 0.88888889 0.84126984 0.87272727 0.91803279
|
|
0.85714286 0.85483871 0.90163934 0.81818182]
|
|
|
|
mean value: 0.8723689260971123
|
|
|
|
key: train_precision
|
|
value: [0.91214953 0.90777577 0.90740741 0.90590406 0.9011194 0.90689013
|
|
0.90619137 0.90723562 0.91028037 0.90740741]
|
|
|
|
mean value: 0.9072361073398956
|
|
|
|
key: test_recall
|
|
value: [0.91525424 0.91525424 0.94915254 0.89830508 0.81355932 0.94915254
|
|
0.81355932 0.89830508 0.93220339 0.91525424]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [0.91902072 0.94538606 0.92278719 0.92467043 0.90960452 0.91713748
|
|
0.90960452 0.92090395 0.91713748 0.92278719]
|
|
|
|
mean value: 0.9209039548022598
|
|
|
|
key: test_roc_auc
|
|
value: [0.88983051 0.90677966 0.91525424 0.86440678 0.84745763 0.93220339
|
|
0.83898305 0.87288136 0.91525424 0.8559322 ]
|
|
|
|
mean value: 0.8838983050847458
|
|
|
|
key: train_roc_auc
|
|
value: [0.91525424 0.92467043 0.91431262 0.91431262 0.90489642 0.91148776
|
|
0.90772128 0.913371 0.913371 0.91431262]
|
|
|
|
mean value: 0.9133709981167608
|
|
|
|
key: test_jcc
|
|
value: [0.80597015 0.83076923 0.84848485 0.76811594 0.72727273 0.875
|
|
0.71641791 0.77941176 0.84615385 0.76056338]
|
|
|
|
mean value: 0.7958159799398703
|
|
|
|
key: train_jcc
|
|
value: [0.84429066 0.86254296 0.84337349 0.84364261 0.82705479 0.83820998
|
|
0.8313253 0.84165232 0.84110535 0.84337349]
|
|
|
|
mean value: 0.8416570968553982
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [5.53531837 4.37649727 3.26275301 4.33462262 5.62743688 5.78908157
|
|
6.21798134 4.838871 5.34016895 5.69845986]
|
|
|
|
mean value: 5.102119088172913
|
|
|
|
key: score_time
|
|
value: [0.01279187 0.01542544 0.01298237 0.01497793 0.01516032 0.0188005
|
|
0.01325631 0.01504207 0.0152328 0.01609254]
|
|
|
|
mean value: 0.014976215362548829
|
|
|
|
key: test_mcc
|
|
value: [0.85749293 0.98319208 0.93435318 0.93435318 0.91855865 0.91855865
|
|
0.9029865 0.9003767 0.96665725 0.95038193]
|
|
|
|
mean value: 0.9266911056150037
|
|
|
|
key: train_mcc
|
|
value: [0.99811853 0.99811853 0.99811853 1. 0.99811853 1.
|
|
0.99811853 0.99623352 0.99811853 0.99811853]
|
|
|
|
mean value: 0.9983063237178031
|
|
|
|
key: test_accuracy
|
|
value: [0.92372881 0.99152542 0.96610169 0.96610169 0.95762712 0.95762712
|
|
0.94915254 0.94915254 0.98305085 0.97457627]
|
|
|
|
mean value: 0.961864406779661
|
|
|
|
key: train_accuracy
|
|
value: [0.99905838 0.99905838 0.99905838 1. 0.99905838 1.
|
|
0.99905838 0.99811676 0.99905838 0.99905838]
|
|
|
|
mean value: 0.9991525423728813
|
|
|
|
key: test_fscore
|
|
value: [0.92913386 0.99159664 0.96721311 0.96721311 0.95934959 0.95934959
|
|
0.9516129 0.95081967 0.98333333 0.97520661]
|
|
|
|
mean value: 0.9634828433683781
|
|
|
|
key: train_fscore
|
|
value: [0.99905927 0.99905927 0.99905927 1. 0.99905927 1.
|
|
0.99905927 0.99811676 0.99905927 0.99905927]
|
|
|
|
mean value: 0.9991531624422229
|
|
|
|
key: test_precision
|
|
value: [0.86764706 0.98333333 0.93650794 0.93650794 0.921875 0.921875
|
|
0.90769231 0.92063492 0.96721311 0.9516129 ]
|
|
|
|
mean value: 0.9314899511479868
|
|
|
|
key: train_precision
|
|
value: [0.9981203 0.9981203 0.9981203 1. 0.9981203 1.
|
|
0.9981203 0.99811676 0.9981203 0.9981203 ]
|
|
|
|
mean value: 0.9984958866091783
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.98305085 1. 1. ]
|
|
|
|
mean value: 0.9983050847457627
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.99811676 1. 1. ]
|
|
|
|
mean value: 0.9998116760828625
|
|
|
|
key: test_roc_auc
|
|
value: [0.92372881 0.99152542 0.96610169 0.96610169 0.95762712 0.95762712
|
|
0.94915254 0.94915254 0.98305085 0.97457627]
|
|
|
|
mean value: 0.961864406779661
|
|
|
|
key: train_roc_auc
|
|
value: [0.99905838 0.99905838 0.99905838 1. 0.99905838 1.
|
|
0.99905838 0.99811676 0.99905838 0.99905838]
|
|
|
|
mean value: 0.9991525423728813
|
|
|
|
key: test_jcc
|
|
value: [0.86764706 0.98333333 0.93650794 0.93650794 0.921875 0.921875
|
|
0.90769231 0.90625 0.96721311 0.9516129 ]
|
|
|
|
mean value: 0.9300514590844948
|
|
|
|
key: train_jcc
|
|
value: [0.9981203 0.9981203 0.9981203 1. 0.9981203 1. 0.9981203
|
|
0.9962406 0.9981203 0.9981203]
|
|
|
|
mean value: 0.9983082706766917
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0692749 0.05011821 0.05140734 0.07988405 0.05059075 0.06561899
|
|
0.07502794 0.06454301 0.07872796 0.0528667 ]
|
|
|
|
mean value: 0.06380598545074463
|
|
|
|
key: score_time
|
|
value: [0.00942755 0.00956416 0.0098052 0.01367497 0.00988388 0.01063585
|
|
0.01280856 0.01017737 0.01284742 0.00961304]
|
|
|
|
mean value: 0.010843801498413085
|
|
|
|
key: test_mcc
|
|
value: [0.93435318 0.98319208 0.91643971 0.9029865 0.95038193 0.93435318
|
|
0.91855865 0.95038193 0.98319208 0.89882165]
|
|
|
|
mean value: 0.9372660897704187
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96610169 0.99152542 0.95762712 0.94915254 0.97457627 0.96610169
|
|
0.95762712 0.97457627 0.99152542 0.94915254]
|
|
|
|
mean value: 0.9677966101694915
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96721311 0.99159664 0.95867769 0.9516129 0.97520661 0.96721311
|
|
0.95934959 0.97520661 0.99159664 0.95 ]
|
|
|
|
mean value: 0.9687672912631772
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.93650794 0.98333333 0.93548387 0.90769231 0.9516129 0.93650794
|
|
0.921875 0.9516129 0.98333333 0.93442623]
|
|
|
|
mean value: 0.9442385754302399
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.98305085 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9949152542372881
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96610169 0.99152542 0.95762712 0.94915254 0.97457627 0.96610169
|
|
0.95762712 0.97457627 0.99152542 0.94915254]
|
|
|
|
mean value: 0.9677966101694916
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.93650794 0.98333333 0.92063492 0.90769231 0.9516129 0.93650794
|
|
0.921875 0.9516129 0.98333333 0.9047619 ]
|
|
|
|
mean value: 0.9397872479223286
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20147061 0.21714115 0.20334792 0.1800189 0.1938014 0.16830301
|
|
0.16693687 0.20908928 0.18658495 0.17727923]
|
|
|
|
mean value: 0.19039733409881593
|
|
|
|
key: score_time
|
|
value: [0.03004575 0.02099943 0.02024198 0.0294168 0.02240396 0.01927185
|
|
0.02342463 0.02899504 0.02147627 0.01911068]
|
|
|
|
mean value: 0.023538637161254882
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.96665725 0.98319208 1. 0.98319208
|
|
1. 1. 0.96665725 0.94928891]
|
|
|
|
mean value: 0.9848987555773493
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.98305085 0.99152542 1. 0.99152542
|
|
1. 1. 0.98305085 0.97457627]
|
|
|
|
mean value: 0.9923728813559323
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.98333333 0.99159664 1. 0.99159664
|
|
1. 1. 0.98333333 0.97435897]
|
|
|
|
mean value: 0.9924218918336565
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.96721311 0.98333333 1. 0.98333333
|
|
1. 1. 0.96721311 0.98275862]
|
|
|
|
mean value: 0.9883851516864518
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.98305085 0.99152542 1. 0.99152542
|
|
1. 1. 0.98305085 0.97457627]
|
|
|
|
mean value: 0.9923728813559322
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.96721311 0.98333333 1. 0.98333333
|
|
1. 1. 0.96721311 0.95 ]
|
|
|
|
mean value: 0.9851092896174863
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0166502 0.01452446 0.01455283 0.01721382 0.01821232 0.01415896
|
|
0.01335478 0.01487994 0.01616764 0.02213693]
|
|
|
|
mean value: 0.016185188293457033
|
|
|
|
key: score_time
|
|
value: [0.01028776 0.00975513 0.01007414 0.01537156 0.00954986 0.01089168
|
|
0.0105772 0.01028156 0.01495886 0.01172209]
|
|
|
|
mean value: 0.011346983909606933
|
|
|
|
key: test_mcc
|
|
value: [0.9029865 0.9029865 0.85749293 0.87246434 0.88762536 0.85749293
|
|
0.84270097 0.91855865 0.95038193 0.8824975 ]
|
|
|
|
mean value: 0.8875187609903276
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94915254 0.94915254 0.92372881 0.93220339 0.94067797 0.92372881
|
|
0.91525424 0.95762712 0.97457627 0.94067797]
|
|
|
|
mean value: 0.940677966101695
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.9516129 0.9516129 0.92913386 0.93650794 0.944 0.92913386
|
|
0.921875 0.95934959 0.97520661 0.94214876]
|
|
|
|
mean value: 0.9440581424891744
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90769231 0.90769231 0.86764706 0.88059701 0.89393939 0.86764706
|
|
0.85507246 0.921875 0.9516129 0.91935484]
|
|
|
|
mean value: 0.8973130347600041
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94915254 0.94915254 0.92372881 0.93220339 0.94067797 0.92372881
|
|
0.91525424 0.95762712 0.97457627 0.94067797]
|
|
|
|
mean value: 0.940677966101695
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.90769231 0.90769231 0.86764706 0.88059701 0.89393939 0.86764706
|
|
0.85507246 0.921875 0.9516129 0.890625 ]
|
|
|
|
mean value: 0.8944400508890363
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.759094 2.94365668 2.82175159 2.61615205 2.6051569 2.68661141
|
|
2.59693551 2.64133501 2.74272108 2.55798745]
|
|
|
|
mean value: 2.697140169143677
|
|
|
|
key: score_time
|
|
value: [0.10589242 0.14642477 0.1048069 0.10735679 0.09872508 0.10027003
|
|
0.10456824 0.16720176 0.09953618 0.09945989]
|
|
|
|
mean value: 0.11342420578002929
|
|
|
|
key: test_mcc
|
|
value: [0.96665725 1. 0.96665725 0.96665725 1. 0.98319208
|
|
0.98319208 0.98319208 0.98319208 0.93220339]
|
|
|
|
mean value: 0.9764943446137222
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98305085 1. 0.98305085 0.98305085 1. 0.99152542
|
|
0.99152542 0.99152542 0.99152542 0.96610169]
|
|
|
|
mean value: 0.9881355932203391
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98333333 1. 0.98333333 0.98333333 1. 0.99159664
|
|
0.99159664 0.99159664 0.99159664 0.96610169]
|
|
|
|
mean value: 0.9882488249537102
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96721311 1. 0.96721311 0.96721311 1. 0.98333333
|
|
0.98333333 0.98333333 0.98333333 0.96610169]
|
|
|
|
mean value: 0.9801074372510883
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98305085 1. 0.98305085 0.98305085 1. 0.99152542
|
|
0.99152542 0.99152542 0.99152542 0.96610169]
|
|
|
|
mean value: 0.988135593220339
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96721311 1. 0.96721311 0.96721311 1. 0.98333333
|
|
0.98333333 0.98333333 0.98333333 0.93442623]
|
|
|
|
mean value: 0.9769398907103825
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.13536143 1.16674852 1.11894178 1.15462422 1.16094041 1.16159368
|
|
1.21700954 1.1570735 1.14313674 1.15245867]
|
|
|
|
mean value: 1.1567888498306274
|
|
|
|
key: score_time
|
|
value: [0.28587818 0.23341537 0.23972082 0.26531696 0.2762959 0.260113
|
|
0.12891078 0.27162743 0.2963872 0.24842334]
|
|
|
|
mean value: 0.25060889720916746
|
|
|
|
key: test_mcc
|
|
value: [0.95038193 0.98319208 0.95038193 0.93435318 0.98319208 0.94928891
|
|
0.96665725 0.96665725 0.98319208 0.91538573]
|
|
|
|
mean value: 0.9582682403460042
|
|
|
|
key: train_mcc
|
|
value: [0.98876369 0.98690308 0.9924952 0.98876369 0.98690308 0.98686108
|
|
0.98690308 0.98876369 0.98876369 0.98876369]
|
|
|
|
mean value: 0.9883883970521011
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.99152542 0.97457627 0.96610169 0.99152542 0.97457627
|
|
0.98305085 0.98305085 0.99152542 0.95762712]
|
|
|
|
mean value: 0.978813559322034
|
|
|
|
key: train_accuracy
|
|
value: [0.99435028 0.99340866 0.99623352 0.99435028 0.99340866 0.99340866
|
|
0.99340866 0.99435028 0.99435028 0.99435028]
|
|
|
|
mean value: 0.9941619585687382
|
|
|
|
key: test_fscore
|
|
value: [0.97520661 0.99159664 0.97520661 0.96721311 0.99159664 0.97435897
|
|
0.98333333 0.98333333 0.99159664 0.95798319]
|
|
|
|
mean value: 0.9791425088163932
|
|
|
|
key: train_fscore
|
|
value: [0.99438202 0.99345182 0.99624765 0.99438202 0.99345182 0.99343955
|
|
0.99345182 0.99438202 0.99438202 0.99438202]
|
|
|
|
mean value: 0.9941952789688489
|
|
|
|
key: test_precision
|
|
value: [0.9516129 0.98333333 0.9516129 0.93650794 0.98333333 0.98275862
|
|
0.96721311 0.96721311 0.98333333 0.95 ]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
0.9656918593157401
|
|
|
|
key: train_precision
|
|
value: [0.98882682 0.98698885 0.99252336 0.98882682 0.98698885 0.98880597
|
|
0.98698885 0.98882682 0.98882682 0.98882682]
|
|
|
|
mean value: 0.9886429955598455
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.96610169
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9932203389830508
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 0.99811676
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9998116760828625
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.99152542 0.97457627 0.96610169 0.99152542 0.97457627
|
|
0.98305085 0.98305085 0.99152542 0.95762712]
|
|
|
|
mean value: 0.978813559322034
|
|
|
|
key: train_roc_auc
|
|
value: [0.99435028 0.99340866 0.99623352 0.99435028 0.99340866 0.99340866
|
|
0.99340866 0.99435028 0.99435028 0.99435028]
|
|
|
|
mean value: 0.9941619585687382
|
|
|
|
key: test_jcc
|
|
value: [0.9516129 0.98333333 0.9516129 0.93650794 0.98333333 0.95
|
|
0.96721311 0.96721311 0.98333333 0.91935484]
|
|
|
|
mean value: 0.9593514811177424
|
|
|
|
key: train_jcc
|
|
value: [0.98882682 0.98698885 0.99252336 0.98882682 0.98698885 0.98696462
|
|
0.98698885 0.98882682 0.98882682 0.98882682]
|
|
|
|
mean value: 0.9884588603698736
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0293107 0.0175004 0.01743841 0.01543427 0.01736736 0.01755309
|
|
0.01757669 0.01742125 0.01782084 0.01749325]
|
|
|
|
mean value: 0.018491625785827637
|
|
|
|
key: score_time
|
|
value: [0.01359296 0.01260734 0.01272488 0.012609 0.01274157 0.01259136
|
|
0.01261878 0.01260448 0.01255107 0.01265883]
|
|
|
|
mean value: 0.012730026245117187
|
|
|
|
key: test_mcc
|
|
value: [0.44381268 0.72881356 0.59631184 0.52542373 0.5770176 0.56453575
|
|
0.37336433 0.61450987 0.47464445 0.45178123]
|
|
|
|
mean value: 0.5350215037451828
|
|
|
|
key: train_mcc
|
|
value: [0.5221909 0.5502582 0.51633818 0.57287155 0.57884874 0.53587577
|
|
0.59180008 0.55766134 0.57672237 0.56061191]
|
|
|
|
mean value: 0.5563179042103773
|
|
|
|
key: test_accuracy
|
|
value: [0.72033898 0.86440678 0.79661017 0.76271186 0.78813559 0.77966102
|
|
0.68644068 0.80508475 0.73728814 0.72033898]
|
|
|
|
mean value: 0.7661016949152543
|
|
|
|
key: train_accuracy
|
|
value: [0.76082863 0.77495292 0.75800377 0.78625235 0.78907721 0.76741996
|
|
0.79566855 0.7787194 0.78813559 0.77966102]
|
|
|
|
mean value: 0.7778719397363465
|
|
|
|
key: test_fscore
|
|
value: [0.736 0.86440678 0.80645161 0.76271186 0.7826087 0.79365079
|
|
0.67826087 0.816 0.73504274 0.7480916 ]
|
|
|
|
mean value: 0.7723224953935377
|
|
|
|
key: train_fscore
|
|
value: [0.76611418 0.77890842 0.76225717 0.79000925 0.79411765 0.77442922
|
|
0.79963066 0.7818013 0.79224377 0.78688525]
|
|
|
|
mean value: 0.7826396858102938
|
|
|
|
key: test_precision
|
|
value: [0.6969697 0.86440678 0.76923077 0.76271186 0.80357143 0.74626866
|
|
0.69642857 0.77272727 0.74137931 0.68055556]
|
|
|
|
mean value: 0.7534249905612337
|
|
|
|
key: train_precision
|
|
value: [0.74954955 0.76545455 0.74909091 0.77636364 0.77558348 0.75177305
|
|
0.78442029 0.77106227 0.77717391 0.76190476]
|
|
|
|
mean value: 0.7662376408913959
|
|
|
|
key: test_recall
|
|
value: [0.77966102 0.86440678 0.84745763 0.76271186 0.76271186 0.84745763
|
|
0.66101695 0.86440678 0.72881356 0.83050847]
|
|
|
|
mean value: 0.7949152542372881
|
|
|
|
key: train_recall
|
|
value: [0.7834275 0.79284369 0.77589454 0.80414313 0.81355932 0.79849341
|
|
0.81544256 0.79284369 0.8079096 0.81355932]
|
|
|
|
mean value: 0.7998116760828625
|
|
|
|
key: test_roc_auc
|
|
value: [0.72033898 0.86440678 0.79661017 0.76271186 0.78813559 0.77966102
|
|
0.68644068 0.80508475 0.73728814 0.72033898]
|
|
|
|
mean value: 0.7661016949152541
|
|
|
|
key: train_roc_auc
|
|
value: [0.76082863 0.77495292 0.75800377 0.78625235 0.78907721 0.76741996
|
|
0.79566855 0.7787194 0.78813559 0.77966102]
|
|
|
|
mean value: 0.7778719397363465
|
|
|
|
key: test_jcc
|
|
value: [0.58227848 0.76119403 0.67567568 0.61643836 0.64285714 0.65789474
|
|
0.51315789 0.68918919 0.58108108 0.59756098]
|
|
|
|
mean value: 0.631732756301958
|
|
|
|
key: train_jcc
|
|
value: [0.62089552 0.63787879 0.61584454 0.6529052 0.65853659 0.6318927
|
|
0.66615385 0.64176829 0.6559633 0.64864865]
|
|
|
|
mean value: 0.6430487426209308
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.14282894 0.14083934 0.12548876 0.12557888 0.12832046 0.12443876
|
|
0.1517005 0.13677001 0.12723207 0.27929258]
|
|
|
|
mean value: 0.14824903011322021
|
|
|
|
key: score_time
|
|
value: [0.01261878 0.01212049 0.01157165 0.01150203 0.01128888 0.01137614
|
|
0.01133227 0.01153827 0.01134849 0.01248336]
|
|
|
|
mean value: 0.011718034744262695
|
|
|
|
key: test_mcc
|
|
value: [0.95038193 0.98319208 0.95038193 0.96665725 0.96665725 0.91855865
|
|
0.98319208 0.98319208 0.98319208 0.89882165]
|
|
|
|
mean value: 0.9584226966948862
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97457627 0.99152542 0.97457627 0.98305085 0.98305085 0.95762712
|
|
0.99152542 0.99152542 0.99152542 0.94915254]
|
|
|
|
mean value: 0.978813559322034
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97520661 0.99159664 0.97520661 0.98333333 0.98333333 0.95934959
|
|
0.99159664 0.99159664 0.99159664 0.95 ]
|
|
|
|
mean value: 0.9792816037924945
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9516129 0.98333333 0.9516129 0.96721311 0.96721311 0.921875
|
|
0.98333333 0.98333333 0.98333333 0.93442623]
|
|
|
|
mean value: 0.9627286598801339
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97457627 0.99152542 0.97457627 0.98305085 0.98305085 0.95762712
|
|
0.99152542 0.99152542 0.99152542 0.94915254]
|
|
|
|
mean value: 0.978813559322034
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9516129 0.98333333 0.9516129 0.96721311 0.96721311 0.921875
|
|
0.98333333 0.98333333 0.98333333 0.9047619 ]
|
|
|
|
mean value: 0.9597622274055048
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06022215 0.0741868 0.07381701 0.0659914 0.09116364 0.07465959
|
|
0.06501889 0.08443975 0.08815455 0.07589269]
|
|
|
|
mean value: 0.07535464763641357
|
|
|
|
key: score_time
|
|
value: [0.0192883 0.01250887 0.0124867 0.01942992 0.01940823 0.01253057
|
|
0.02062702 0.01257706 0.01924729 0.01531935]
|
|
|
|
mean value: 0.01634232997894287
|
|
|
|
key: test_mcc
|
|
value: [0.80403577 0.87246434 0.85348593 0.80830501 0.79661017 0.75106762
|
|
0.78889349 0.86440678 0.83098605 0.77390906]
|
|
|
|
mean value: 0.8144164216567024
|
|
|
|
key: train_mcc
|
|
value: [0.88644539 0.87352738 0.88807086 0.86891935 0.88414271 0.89530736
|
|
0.88595389 0.89758293 0.88414271 0.88958484]
|
|
|
|
mean value: 0.8853677416743689
|
|
|
|
key: test_accuracy
|
|
value: [0.89830508 0.93220339 0.92372881 0.89830508 0.89830508 0.87288136
|
|
0.88983051 0.93220339 0.91525424 0.88135593]
|
|
|
|
mean value: 0.9042372881355932
|
|
|
|
key: train_accuracy
|
|
value: [0.94256121 0.93596987 0.94350282 0.93408663 0.94161959 0.9472693
|
|
0.94256121 0.94821092 0.94161959 0.94444444]
|
|
|
|
mean value: 0.9421845574387947
|
|
|
|
key: test_fscore
|
|
value: [0.9047619 0.93650794 0.928 0.90625 0.89830508 0.88
|
|
0.8976378 0.93220339 0.91666667 0.890625 ]
|
|
|
|
mean value: 0.9090957777788369
|
|
|
|
key: train_fscore
|
|
value: [0.94408799 0.93784278 0.94485294 0.93542435 0.94290976 0.94833948
|
|
0.9437788 0.94949495 0.94290976 0.9455217 ]
|
|
|
|
mean value: 0.9435162521776751
|
|
|
|
key: test_precision
|
|
value: [0.85074627 0.88059701 0.87878788 0.84057971 0.89830508 0.83333333
|
|
0.83823529 0.93220339 0.90163934 0.82608696]
|
|
|
|
mean value: 0.8680514275326182
|
|
|
|
key: train_precision
|
|
value: [0.91964286 0.91119005 0.92280072 0.91681736 0.92252252 0.92947559
|
|
0.92418773 0.9265233 0.92252252 0.92753623]
|
|
|
|
mean value: 0.9223218876172361
|
|
|
|
key: test_recall
|
|
value: [0.96610169 1. 0.98305085 0.98305085 0.89830508 0.93220339
|
|
0.96610169 0.93220339 0.93220339 0.96610169]
|
|
|
|
mean value: 0.9559322033898305
|
|
|
|
key: train_recall
|
|
value: [0.96986817 0.96610169 0.96798493 0.95480226 0.96421846 0.96798493
|
|
0.96421846 0.97363465 0.96421846 0.96421846]
|
|
|
|
mean value: 0.9657250470809793
|
|
|
|
key: test_roc_auc
|
|
value: [0.89830508 0.93220339 0.92372881 0.89830508 0.89830508 0.87288136
|
|
0.88983051 0.93220339 0.91525424 0.88135593]
|
|
|
|
mean value: 0.9042372881355932
|
|
|
|
key: train_roc_auc
|
|
value: [0.94256121 0.93596987 0.94350282 0.93408663 0.94161959 0.9472693
|
|
0.94256121 0.94821092 0.94161959 0.94444444]
|
|
|
|
mean value: 0.9421845574387947
|
|
|
|
key: test_jcc
|
|
value: [0.82608696 0.88059701 0.86567164 0.82857143 0.81538462 0.78571429
|
|
0.81428571 0.87301587 0.84615385 0.8028169 ]
|
|
|
|
mean value: 0.8338298277772371
|
|
|
|
key: train_jcc
|
|
value: [0.89409722 0.88296041 0.89547038 0.87868284 0.89198606 0.90175439
|
|
0.89354276 0.90384615 0.89198606 0.8966725 ]
|
|
|
|
mean value: 0.8930998787908067
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01625371 0.01642179 0.01652074 0.01647019 0.01657486 0.01647496
|
|
0.01642179 0.01673937 0.01653385 0.01655269]
|
|
|
|
mean value: 0.0164963960647583
|
|
|
|
key: score_time
|
|
value: [0.01237965 0.01235509 0.01239276 0.01232767 0.01228762 0.01232171
|
|
0.01233506 0.012393 0.01233745 0.01234388]
|
|
|
|
mean value: 0.01234738826751709
|
|
|
|
key: test_mcc
|
|
value: [0.56061191 0.6486493 0.61450987 0.52663543 0.54879547 0.62711864
|
|
0.40824829 0.54307539 0.54307539 0.59356147]
|
|
|
|
mean value: 0.5614281160993403
|
|
|
|
key: train_mcc
|
|
value: [0.57571257 0.54974495 0.5627251 0.57835174 0.59052715 0.56229218
|
|
0.59347205 0.58507648 0.57259062 0.59081323]
|
|
|
|
mean value: 0.5761306055928781
|
|
|
|
key: test_accuracy
|
|
value: [0.77966102 0.8220339 0.80508475 0.76271186 0.77118644 0.81355932
|
|
0.70338983 0.77118644 0.77118644 0.79661017]
|
|
|
|
mean value: 0.7796610169491526
|
|
|
|
key: train_accuracy
|
|
value: [0.78719397 0.7740113 0.78060264 0.78813559 0.79472693 0.78060264
|
|
0.79566855 0.79190207 0.78531073 0.79472693]
|
|
|
|
mean value: 0.7872881355932203
|
|
|
|
key: test_fscore
|
|
value: [0.77192982 0.81081081 0.79279279 0.75438596 0.75229358 0.81355932
|
|
0.69026549 0.76521739 0.76521739 0.79310345]
|
|
|
|
mean value: 0.7709576010703059
|
|
|
|
key: train_fscore
|
|
value: [0.7797271 0.76470588 0.77223851 0.77876106 0.78834951 0.77356657
|
|
0.78662734 0.78481013 0.77603143 0.78752437]
|
|
|
|
mean value: 0.7792341900577046
|
|
|
|
key: test_precision
|
|
value: [0.8 0.86538462 0.84615385 0.78181818 0.82 0.81355932
|
|
0.72222222 0.78571429 0.78571429 0.80701754]
|
|
|
|
mean value: 0.8027584302900984
|
|
|
|
key: train_precision
|
|
value: [0.80808081 0.79754601 0.80284553 0.81481481 0.81362725 0.79919679
|
|
0.82304527 0.8125 0.8110883 0.81616162]
|
|
|
|
mean value: 0.8098906384617671
|
|
|
|
key: test_recall
|
|
value: [0.74576271 0.76271186 0.74576271 0.72881356 0.69491525 0.81355932
|
|
0.66101695 0.74576271 0.74576271 0.77966102]
|
|
|
|
mean value: 0.7423728813559323
|
|
|
|
key: train_recall
|
|
value: [0.75329567 0.73446328 0.74387947 0.74576271 0.7645951 0.74952919
|
|
0.75329567 0.75894539 0.74387947 0.76082863]
|
|
|
|
mean value: 0.7508474576271187
|
|
|
|
key: test_roc_auc
|
|
value: [0.77966102 0.8220339 0.80508475 0.76271186 0.77118644 0.81355932
|
|
0.70338983 0.77118644 0.77118644 0.79661017]
|
|
|
|
mean value: 0.7796610169491526
|
|
|
|
key: train_roc_auc
|
|
value: [0.78719397 0.7740113 0.78060264 0.78813559 0.79472693 0.78060264
|
|
0.79566855 0.79190207 0.78531073 0.79472693]
|
|
|
|
mean value: 0.7872881355932203
|
|
|
|
key: test_jcc
|
|
value: [0.62857143 0.68181818 0.65671642 0.6056338 0.60294118 0.68571429
|
|
0.52702703 0.61971831 0.61971831 0.65714286]
|
|
|
|
mean value: 0.6285001797190027
|
|
|
|
key: train_jcc
|
|
value: [0.63897764 0.61904762 0.62898089 0.63768116 0.65064103 0.63074485
|
|
0.64829822 0.64583333 0.63402889 0.64951768]
|
|
|
|
mean value: 0.6383751308913307
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04364491 0.03171659 0.0358181 0.03312755 0.04538345 0.04394197
|
|
0.04156375 0.03292131 0.03057885 0.04242516]
|
|
|
|
mean value: 0.03811216354370117
|
|
|
|
key: score_time
|
|
value: [0.01118135 0.01254201 0.01271796 0.01249433 0.01252294 0.01240826
|
|
0.01241255 0.01240587 0.01242971 0.01239181]
|
|
|
|
mean value: 0.012350678443908691
|
|
|
|
key: test_mcc
|
|
value: [0.49562035 0.58532256 0.69410843 0.66658552 0.37896836 0.74672866
|
|
0.79844727 0.75459377 0.88148255 0.74339194]
|
|
|
|
mean value: 0.674524941374315
|
|
|
|
key: train_mcc
|
|
value: [0.66098947 0.54903133 0.64525111 0.66962913 0.49572835 0.84955783
|
|
0.86256178 0.82864339 0.85129808 0.79614456]
|
|
|
|
mean value: 0.720883503270397
|
|
|
|
key: test_accuracy
|
|
value: [0.72033898 0.76271186 0.83050847 0.81355932 0.65254237 0.87288136
|
|
0.89830508 0.87288136 0.94067797 0.8559322 ]
|
|
|
|
mean value: 0.8220338983050848
|
|
|
|
key: train_accuracy
|
|
value: [0.80885122 0.73728814 0.79472693 0.81544256 0.70056497 0.92467043
|
|
0.93126177 0.91148776 0.92561205 0.88794727]
|
|
|
|
mean value: 0.8437853107344633
|
|
|
|
key: test_fscore
|
|
value: [0.63736264 0.69565217 0.85294118 0.7755102 0.5060241 0.86956522
|
|
0.90163934 0.88188976 0.94117647 0.87407407]
|
|
|
|
mean value: 0.793583515830888
|
|
|
|
key: train_fscore
|
|
value: [0.7674685 0.6490566 0.82942097 0.77828054 0.576 0.92380952
|
|
0.93158388 0.91637011 0.92609916 0.89923793]
|
|
|
|
mean value: 0.8197327219107842
|
|
|
|
key: test_precision
|
|
value: [0.90625 0.96969697 0.75324675 0.97435897 0.875 0.89285714
|
|
0.87301587 0.82352941 0.93333333 0.77631579]
|
|
|
|
mean value: 0.8777604247747437
|
|
|
|
key: train_precision
|
|
value: [0.97953216 0.97727273 0.70950469 0.97450425 0.98630137 0.9344894
|
|
0.92723881 0.86846543 0.92007435 0.81692308]
|
|
|
|
mean value: 0.909430626062848
|
|
|
|
key: test_recall
|
|
value: [0.49152542 0.54237288 0.98305085 0.6440678 0.3559322 0.84745763
|
|
0.93220339 0.94915254 0.94915254 1. ]
|
|
|
|
mean value: 0.7694915254237288
|
|
|
|
key: train_recall
|
|
value: [0.63088512 0.48587571 0.99811676 0.64783427 0.40677966 0.913371
|
|
0.93596987 0.96986817 0.93220339 1. ]
|
|
|
|
mean value: 0.792090395480226
|
|
|
|
key: test_roc_auc
|
|
value: [0.72033898 0.76271186 0.83050847 0.81355932 0.65254237 0.87288136
|
|
0.89830508 0.87288136 0.94067797 0.8559322 ]
|
|
|
|
mean value: 0.8220338983050848
|
|
|
|
key: train_roc_auc
|
|
value: [0.80885122 0.73728814 0.79472693 0.81544256 0.70056497 0.92467043
|
|
0.93126177 0.91148776 0.92561205 0.88794727]
|
|
|
|
mean value: 0.8437853107344633
|
|
|
|
key: test_jcc
|
|
value: [0.46774194 0.53333333 0.74358974 0.63333333 0.33870968 0.76923077
|
|
0.82089552 0.78873239 0.88888889 0.77631579]
|
|
|
|
mean value: 0.6760771387507235
|
|
|
|
key: train_jcc
|
|
value: [0.62267658 0.48044693 0.70855615 0.63703704 0.40449438 0.85840708
|
|
0.87192982 0.8456486 0.86236934 0.81692308]
|
|
|
|
mean value: 0.7108488999470968
|
|
|
|
MCC on Blind test: 0.71
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0334394 0.03457427 0.03350091 0.04584169 0.03807473 0.0404737
|
|
0.04320264 0.04615474 0.04610324 0.03607368]
|
|
|
|
mean value: 0.039743900299072266
|
|
|
|
key: score_time
|
|
value: [0.01591468 0.01249862 0.01360559 0.01247191 0.01977515 0.01233602
|
|
0.02015829 0.01549196 0.01238632 0.01881671]
|
|
|
|
mean value: 0.015345525741577149
|
|
|
|
key: test_mcc
|
|
value: [0.47203432 0.89882165 0.75024096 0.84757938 0.81361651 0.55849057
|
|
0.80403577 0.81934649 0.7710996 0.61631563]
|
|
|
|
mean value: 0.7351580880750913
|
|
|
|
key: train_mcc
|
|
value: [0.6488982 0.85875858 0.71213646 0.88339584 0.81234323 0.6711522
|
|
0.8618575 0.85806759 0.78052567 0.62108337]
|
|
|
|
mean value: 0.7708218654196989
|
|
|
|
key: test_accuracy
|
|
value: [0.71186441 0.94915254 0.86440678 0.92372881 0.89830508 0.76271186
|
|
0.89830508 0.90677966 0.87288136 0.79661017]
|
|
|
|
mean value: 0.8584745762711865
|
|
|
|
key: train_accuracy
|
|
value: [0.80320151 0.92937853 0.83709981 0.94161959 0.89830508 0.81638418
|
|
0.92937853 0.92843691 0.87947269 0.78719397]
|
|
|
|
mean value: 0.8750470809792844
|
|
|
|
key: test_fscore
|
|
value: [0.63043478 0.95 0.87878788 0.92307692 0.90769231 0.71428571
|
|
0.9047619 0.9009009 0.88721805 0.76470588]
|
|
|
|
mean value: 0.8461864339580047
|
|
|
|
key: train_fscore
|
|
value: [0.76059565 0.92931197 0.85969181 0.94106464 0.90737564 0.77966102
|
|
0.93224932 0.92649903 0.8920742 0.7372093 ]
|
|
|
|
mean value: 0.8665732581276413
|
|
|
|
key: test_precision
|
|
value: [0.87878788 0.93442623 0.79452055 0.93103448 0.83098592 0.8974359
|
|
0.85074627 0.96153846 0.7972973 0.90697674]
|
|
|
|
mean value: 0.8783749723607278
|
|
|
|
key: train_precision
|
|
value: [0.97076023 0.93018868 0.75498575 0.95009597 0.83307087 0.97457627
|
|
0.89583333 0.95228628 0.80763359 0.96352584]
|
|
|
|
mean value: 0.9032956814059184
|
|
|
|
key: test_recall
|
|
value: [0.49152542 0.96610169 0.98305085 0.91525424 1. 0.59322034
|
|
0.96610169 0.84745763 1. 0.66101695]
|
|
|
|
mean value: 0.8423728813559322
|
|
|
|
key: train_recall
|
|
value: [0.6252354 0.92843691 0.99811676 0.93220339 0.99623352 0.64971751
|
|
0.97175141 0.90207156 0.99623352 0.59698682]
|
|
|
|
mean value: 0.85969868173258
|
|
|
|
key: test_roc_auc
|
|
value: [0.71186441 0.94915254 0.86440678 0.92372881 0.89830508 0.76271186
|
|
0.89830508 0.90677966 0.87288136 0.79661017]
|
|
|
|
mean value: 0.8584745762711864
|
|
|
|
key: train_roc_auc
|
|
value: [0.80320151 0.92937853 0.83709981 0.94161959 0.89830508 0.81638418
|
|
0.92937853 0.92843691 0.87947269 0.78719397]
|
|
|
|
mean value: 0.8750470809792843
|
|
|
|
key: test_jcc
|
|
value: [0.46031746 0.9047619 0.78378378 0.85714286 0.83098592 0.55555556
|
|
0.82608696 0.81967213 0.7972973 0.61904762]
|
|
|
|
mean value: 0.7454651481068716
|
|
|
|
key: train_jcc
|
|
value: [0.61367837 0.86795775 0.75391181 0.88868941 0.83045526 0.63888889
|
|
0.87309645 0.86306306 0.80517504 0.58379374]
|
|
|
|
mean value: 0.7718709768166047
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.29463863 0.27934384 0.28409028 0.28360248 0.28656888 0.28917575
|
|
0.29048419 0.29290462 0.28766036 0.29452634]
|
|
|
|
mean value: 0.28829953670501707
|
|
|
|
key: score_time
|
|
value: [0.01592994 0.01695418 0.01719713 0.01697969 0.0162251 0.01613069
|
|
0.01775122 0.01807022 0.01747775 0.01749539]
|
|
|
|
mean value: 0.01702113151550293
|
|
|
|
key: test_mcc
|
|
value: [0.9029865 0.96610169 0.95038193 0.91643971 0.8824975 0.87246434
|
|
0.86891154 0.9003767 0.93220339 0.84855529]
|
|
|
|
mean value: 0.9040918602755158
|
|
|
|
key: train_mcc
|
|
value: [0.95717616 0.97380735 0.96643765 0.95548024 0.95497162 0.97011593
|
|
0.968124 0.96435698 0.95504617 0.9646034 ]
|
|
|
|
mean value: 0.9630119506438628
|
|
|
|
key: test_accuracy
|
|
value: [0.94915254 0.98305085 0.97457627 0.95762712 0.94067797 0.93220339
|
|
0.93220339 0.94915254 0.96610169 0.92372881]
|
|
|
|
mean value: 0.9508474576271186
|
|
|
|
key: train_accuracy
|
|
value: [0.97834275 0.98681733 0.98305085 0.97740113 0.97740113 0.98493409
|
|
0.98399247 0.98210923 0.97740113 0.98210923]
|
|
|
|
mean value: 0.9813559322033898
|
|
|
|
key: test_fscore
|
|
value: [0.9516129 0.98305085 0.97520661 0.95867769 0.94214876 0.93650794
|
|
0.93548387 0.95081967 0.96610169 0.92561983]
|
|
|
|
mean value: 0.9525229817767498
|
|
|
|
key: train_fscore
|
|
value: [0.97868397 0.9869403 0.98327138 0.97781885 0.97761194 0.98510242
|
|
0.98412698 0.98225957 0.97765363 0.9823584 ]
|
|
|
|
mean value: 0.9815827444615142
|
|
|
|
key: test_precision
|
|
value: [0.90769231 0.98305085 0.9516129 0.93548387 0.91935484 0.88059701
|
|
0.89230769 0.92063492 0.96610169 0.90322581]
|
|
|
|
mean value: 0.9260061897288013
|
|
|
|
key: train_precision
|
|
value: [0.96350365 0.97781885 0.9706422 0.9600726 0.96857671 0.97421731
|
|
0.97592593 0.97407407 0.96685083 0.96886447]
|
|
|
|
mean value: 0.9700546619349637
|
|
|
|
key: test_recall
|
|
value: [1. 0.98305085 1. 0.98305085 0.96610169 1.
|
|
0.98305085 0.98305085 0.96610169 0.94915254]
|
|
|
|
mean value: 0.9813559322033898
|
|
|
|
key: train_recall
|
|
value: [0.99435028 0.99623352 0.99623352 0.99623352 0.98681733 0.99623352
|
|
0.99246704 0.9905838 0.98870056 0.99623352]
|
|
|
|
mean value: 0.9934086629001884
|
|
|
|
key: test_roc_auc
|
|
value: [0.94915254 0.98305085 0.97457627 0.95762712 0.94067797 0.93220339
|
|
0.93220339 0.94915254 0.96610169 0.92372881]
|
|
|
|
mean value: 0.9508474576271186
|
|
|
|
key: train_roc_auc
|
|
value: [0.97834275 0.98681733 0.98305085 0.97740113 0.97740113 0.98493409
|
|
0.98399247 0.98210923 0.97740113 0.98210923]
|
|
|
|
mean value: 0.9813559322033898
|
|
|
|
key: test_jcc
|
|
value: [0.90769231 0.96666667 0.9516129 0.92063492 0.890625 0.88059701
|
|
0.87878788 0.90625 0.93442623 0.86153846]
|
|
|
|
mean value: 0.9098831382979612
|
|
|
|
key: train_jcc
|
|
value: [0.95825771 0.97421731 0.96709324 0.95660036 0.95620438 0.9706422
|
|
0.96875 0.96513761 0.95628415 0.96532847]
|
|
|
|
mean value: 0.9638515438212542
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22556996 0.14138031 0.22185707 0.22179008 0.22441602 0.2474339
|
|
0.24341297 0.24321699 0.24299645 0.24154115]
|
|
|
|
mean value: 0.22536149024963378
|
|
|
|
key: score_time
|
|
value: [0.03844738 0.03375101 0.0297327 0.02599072 0.02285314 0.04229784
|
|
0.04101825 0.04067183 0.04068828 0.039258 ]
|
|
|
|
mean value: 0.03547091484069824
|
|
|
|
key: test_mcc
|
|
value: [0.93435318 0.95038193 0.95038193 0.93435318 0.95038193 0.89882165
|
|
0.91855865 0.96665725 0.98319208 0.9029865 ]
|
|
|
|
mean value: 0.9390068274221124
|
|
|
|
key: train_mcc
|
|
value: [0.99811853 0.99624059 0.99811853 1. 0.99811853 0.99811853
|
|
0.99811853 1. 0.99624059 0.99624059]
|
|
|
|
mean value: 0.9979314418530464
|
|
|
|
key: test_accuracy
|
|
value: [0.96610169 0.97457627 0.97457627 0.96610169 0.97457627 0.94915254
|
|
0.95762712 0.98305085 0.99152542 0.94915254]
|
|
|
|
mean value: 0.9686440677966102
|
|
|
|
key: train_accuracy
|
|
value: [0.99905838 0.99811676 0.99905838 1. 0.99905838 0.99905838
|
|
0.99905838 1. 0.99811676 0.99811676]
|
|
|
|
mean value: 0.9989642184557439
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.96721311 0.97520661 0.97520661 0.96721311 0.97520661 0.95
|
|
0.95934959 0.98333333 0.99159664 0.9516129 ]
|
|
|
|
mean value: 0.9695938532929477
|
|
|
|
key: train_fscore
|
|
value: [0.99905927 0.9981203 0.99905927 1. 0.99905927 0.99905749
|
|
0.99905927 1. 0.9981203 0.9981203 ]
|
|
|
|
mean value: 0.9989655460097466
|
|
|
|
key: test_precision
|
|
value: [0.93650794 0.9516129 0.9516129 0.93650794 0.9516129 0.93442623
|
|
0.921875 0.96721311 0.98333333 0.90769231]
|
|
|
|
mean value: 0.9442394567981228
|
|
|
|
key: train_precision
|
|
value: [0.9981203 0.99624765 0.9981203 1. 0.9981203 1.
|
|
0.9981203 1. 0.99624765 0.99624765]
|
|
|
|
mean value: 0.998122416736024
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.96610169
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 0.99811676
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9998116760828625
|
|
|
|
key: test_roc_auc
|
|
value: [0.96610169 0.97457627 0.97457627 0.96610169 0.97457627 0.94915254
|
|
0.95762712 0.98305085 0.99152542 0.94915254]
|
|
|
|
mean value: 0.9686440677966102
|
|
|
|
key: train_roc_auc
|
|
value: [0.99905838 0.99811676 0.99905838 1. 0.99905838 0.99905838
|
|
0.99905838 1. 0.99811676 0.99811676]
|
|
|
|
mean value: 0.9989642184557439
|
|
|
|
key: test_jcc
|
|
value: [0.93650794 0.9516129 0.9516129 0.93650794 0.9516129 0.9047619
|
|
0.921875 0.96721311 0.98333333 0.90769231]
|
|
|
|
mean value: 0.9412730243234937
|
|
|
|
key: train_jcc
|
|
value: [0.9981203 0.99624765 0.9981203 1. 0.9981203 0.99811676
|
|
0.9981203 1. 0.99624765 0.99624765]
|
|
|
|
mean value: 0.9979340928188865
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.68125367 0.51527524 0.76813889 0.69306612 0.60466146 0.48474383
|
|
0.54582191 0.66710734 0.71118569 0.83804655]
|
|
|
|
mean value: 0.6509300708770752
|
|
|
|
key: score_time
|
|
value: [0.05217338 0.04851079 0.05079579 0.04748201 0.04738474 0.04535532
|
|
0.02606106 0.04651666 0.04927802 0.04628825]
|
|
|
|
mean value: 0.0459846019744873
|
|
|
|
key: test_mcc
|
|
value: [0.91855865 0.93435318 0.88453796 0.9003767 0.88762536 0.9029865
|
|
0.81934649 0.80830501 0.9029865 0.85051727]
|
|
|
|
mean value: 0.88095936415581
|
|
|
|
key: train_mcc
|
|
value: [0.98319208 0.98134164 0.98134164 0.97556114 0.98319208 0.98319208
|
|
0.97746353 0.97183586 0.97949456 0.98504589]
|
|
|
|
mean value: 0.9801660509876724
|
|
|
|
key: test_accuracy
|
|
value: [0.95762712 0.96610169 0.94067797 0.94915254 0.94067797 0.94915254
|
|
0.90677966 0.89830508 0.94915254 0.92372881]
|
|
|
|
mean value: 0.938135593220339
|
|
|
|
key: train_accuracy
|
|
value: [0.99152542 0.9905838 0.9905838 0.98775895 0.99152542 0.99152542
|
|
0.98870056 0.98587571 0.98964218 0.99246704]
|
|
|
|
mean value: 0.9900188323917137
|
|
|
|
key: test_fscore
|
|
value: [0.95934959 0.96721311 0.94308943 0.95081967 0.944 0.9516129
|
|
0.912 0.90625 0.9516129 0.92682927]
|
|
|
|
mean value: 0.9412776886019786
|
|
|
|
key: train_fscore
|
|
value: [0.99159664 0.99067164 0.99067164 0.98781631 0.99159664 0.99159664
|
|
0.98876404 0.98596819 0.98974837 0.99252336]
|
|
|
|
mean value: 0.9900953480015297
|
|
|
|
key: test_precision
|
|
value: [0.921875 0.93650794 0.90625 0.92063492 0.89393939 0.90769231
|
|
0.86363636 0.84057971 0.90769231 0.890625 ]
|
|
|
|
mean value: 0.8989432940248158
|
|
|
|
key: train_precision
|
|
value: [0.98333333 0.98151571 0.98151571 0.98320896 0.98333333 0.98333333
|
|
0.98324022 0.9795539 0.9797048 0.9851577 ]
|
|
|
|
mean value: 0.982389700181488
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.98305085 0.98305085 1. 1.
|
|
0.96610169 0.98305085 1. 0.96610169]
|
|
|
|
mean value: 0.988135593220339
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.99246704 1. 1.
|
|
0.99435028 0.99246704 1. 1. ]
|
|
|
|
mean value: 0.9979284369114878
|
|
|
|
key: test_roc_auc
|
|
value: [0.95762712 0.96610169 0.94067797 0.94915254 0.94067797 0.94915254
|
|
0.90677966 0.89830508 0.94915254 0.92372881]
|
|
|
|
mean value: 0.938135593220339
|
|
|
|
key: train_roc_auc
|
|
value: [0.99152542 0.9905838 0.9905838 0.98775895 0.99152542 0.99152542
|
|
0.98870056 0.98587571 0.98964218 0.99246704]
|
|
|
|
mean value: 0.9900188323917137
|
|
|
|
key: test_jcc
|
|
value: [0.921875 0.93650794 0.89230769 0.90625 0.89393939 0.90769231
|
|
0.83823529 0.82857143 0.90769231 0.86363636]
|
|
|
|
mean value: 0.8896707724465077
|
|
|
|
key: train_jcc
|
|
value: [0.98333333 0.98151571 0.98151571 0.97592593 0.98333333 0.98333333
|
|
0.97777778 0.97232472 0.9797048 0.9851577 ]
|
|
|
|
mean value: 0.9803922346732523
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.22794056 1.22089958 1.22299123 1.22203779 1.2261126 1.24144316
|
|
1.22168493 1.22731614 1.23788023 1.2318294 ]
|
|
|
|
mean value: 1.228013563156128
|
|
|
|
key: score_time
|
|
value: [0.01018167 0.00976276 0.00977564 0.00982285 0.00960684 0.01047516
|
|
0.00944114 0.00993896 0.00983858 0.00990987]
|
|
|
|
mean value: 0.009875345230102538
|
|
|
|
key: test_mcc
|
|
value: [0.93435318 0.95038193 0.95038193 0.93435318 0.95038193 0.93435318
|
|
0.93435318 0.96665725 0.98319208 0.86640023]
|
|
|
|
mean value: 0.9404808067874346
|
|
|
|
key: train_mcc
|
|
value: [0.99436615 0.99062772 0.99624059 0.9924952 0.9924952 0.99436615
|
|
0.99062772 0.99436615 0.98876369 0.99436615]
|
|
|
|
mean value: 0.9928714736885274
|
|
|
|
key: test_accuracy
|
|
value: [0.96610169 0.97457627 0.97457627 0.96610169 0.97457627 0.96610169
|
|
0.96610169 0.98305085 0.99152542 0.93220339]
|
|
|
|
mean value: 0.9694915254237289
|
|
|
|
key: train_accuracy
|
|
value: [0.99717514 0.9952919 0.99811676 0.99623352 0.99623352 0.99717514
|
|
0.9952919 0.99717514 0.99435028 0.99717514]
|
|
|
|
mean value: 0.996421845574388
|
|
|
|
key: test_fscore
|
|
value: [0.96721311 0.97520661 0.97520661 0.96721311 0.97520661 0.96721311
|
|
0.96721311 0.98333333 0.99159664 0.93442623]
|
|
|
|
mean value: 0.9703828495224129
|
|
|
|
key: train_fscore
|
|
value: [0.9971831 0.99531396 0.9981203 0.99624765 0.99624765 0.9971831
|
|
0.99531396 0.9971831 0.99438202 0.9971831 ]
|
|
|
|
mean value: 0.9964357955930726
|
|
|
|
key: test_precision
|
|
value: [0.93650794 0.9516129 0.9516129 0.93650794 0.9516129 0.93650794
|
|
0.93650794 0.96721311 0.98333333 0.9047619 ]
|
|
|
|
mean value: 0.9456178808558502
|
|
|
|
key: train_precision
|
|
value: [0.99438202 0.99067164 0.99624765 0.99252336 0.99252336 0.99438202
|
|
0.99067164 0.99438202 0.98882682 0.99438202]
|
|
|
|
mean value: 0.992899257286839
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9966101694915255
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96610169 0.97457627 0.97457627 0.96610169 0.97457627 0.96610169
|
|
0.96610169 0.98305085 0.99152542 0.93220339]
|
|
|
|
mean value: 0.9694915254237289
|
|
|
|
key: train_roc_auc
|
|
value: [0.99717514 0.9952919 0.99811676 0.99623352 0.99623352 0.99717514
|
|
0.9952919 0.99717514 0.99435028 0.99717514]
|
|
|
|
mean value: 0.996421845574388
|
|
|
|
key: test_jcc
|
|
value: [0.93650794 0.9516129 0.9516129 0.93650794 0.9516129 0.93650794
|
|
0.93650794 0.96721311 0.98333333 0.87692308]
|
|
|
|
mean value: 0.9428339980719674
|
|
|
|
key: train_jcc
|
|
value: [0.99438202 0.99067164 0.99624765 0.99252336 0.99252336 0.99438202
|
|
0.99067164 0.99438202 0.98882682 0.99438202]
|
|
|
|
mean value: 0.992899257286839
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06453919 0.0539906 0.05044985 0.05624151 0.04869103 0.05595088
|
|
0.04633474 0.04755855 0.12119699 0.12829089]
|
|
|
|
mean value: 0.06732442378997802
|
|
|
|
key: score_time
|
|
value: [0.02067423 0.0136342 0.01909781 0.01354551 0.01486897 0.01680827
|
|
0.01569867 0.01601911 0.0256753 0.02190661]
|
|
|
|
mean value: 0.017792868614196777
|
|
|
|
key: test_mcc
|
|
value: [1. 0.98319208 0.96610169 0.98319208 0.93435318 1.
|
|
1. 0.96665725 1. 0.96665725]
|
|
|
|
mean value: 0.9800153529921927
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.99152542 0.98305085 0.99152542 0.96610169 1.
|
|
1. 0.98305085 1. 0.98305085]
|
|
|
|
mean value: 0.9898305084745763
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.99159664 0.98305085 0.99159664 0.96721311 1.
|
|
1. 0.98333333 1. 0.98275862]
|
|
|
|
mean value: 0.9899549193545638
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.98333333 0.98305085 0.98333333 0.93650794 1.
|
|
1. 0.96721311 1. 1. ]
|
|
|
|
mean value: 0.9853438565386329
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.98305085 1. 1. 1.
|
|
1. 1. 1. 0.96610169]
|
|
|
|
mean value: 0.9949152542372881
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.99152542 0.98305085 0.99152542 0.96610169 1.
|
|
1. 0.98305085 1. 0.98305085]
|
|
|
|
mean value: 0.9898305084745763
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.98333333 0.96666667 0.98333333 0.93650794 1.
|
|
1. 0.96721311 1. 0.96610169]
|
|
|
|
mean value: 0.9803156079510622
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04428339 0.04107308 0.05163312 0.06838155 0.05414343 0.04579186
|
|
0.02591658 0.04098225 0.04767823 0.07250929]
|
|
|
|
mean value: 0.049239277839660645
|
|
|
|
key: score_time
|
|
value: [0.02386093 0.04954052 0.03481007 0.02806735 0.03021669 0.0302124
|
|
0.01362944 0.01320767 0.03253913 0.02740669]
|
|
|
|
mean value: 0.028349089622497558
|
|
|
|
key: test_mcc
|
|
value: [0.74672866 0.91538573 0.89882165 0.84855529 0.74586985 0.76315046
|
|
0.72923266 0.76315046 0.83242375 0.80403577]
|
|
|
|
mean value: 0.8047354266106794
|
|
|
|
key: train_mcc
|
|
value: [0.85901454 0.84963622 0.85930733 0.85524693 0.86673432 0.84567037
|
|
0.82863846 0.84753127 0.84183179 0.87962869]
|
|
|
|
mean value: 0.8533239926419909
|
|
|
|
key: test_accuracy
|
|
value: [0.87288136 0.95762712 0.94915254 0.92372881 0.87288136 0.88135593
|
|
0.86440678 0.88135593 0.91525424 0.89830508]
|
|
|
|
mean value: 0.9016949152542373
|
|
|
|
key: train_accuracy
|
|
value: [0.92937853 0.92467043 0.92937853 0.92749529 0.93314501 0.92278719
|
|
0.91431262 0.92372881 0.92090395 0.93973635]
|
|
|
|
mean value: 0.9265536723163842
|
|
|
|
key: test_fscore
|
|
value: [0.87603306 0.95798319 0.95 0.92561983 0.87179487 0.88333333
|
|
0.86666667 0.87931034 0.9122807 0.9047619 ]
|
|
|
|
mean value: 0.9027783908978043
|
|
|
|
key: train_fscore
|
|
value: [0.93023256 0.92565056 0.9306198 0.92837209 0.93419833 0.92336449
|
|
0.91455399 0.92422825 0.92120075 0.94029851]
|
|
|
|
mean value: 0.9272719322281995
|
|
|
|
key: test_precision
|
|
value: [0.85483871 0.95 0.93442623 0.90322581 0.87931034 0.86885246
|
|
0.85245902 0.89473684 0.94545455 0.85074627]
|
|
|
|
mean value: 0.8934050222091177
|
|
|
|
key: train_precision
|
|
value: [0.91911765 0.91376147 0.91454545 0.91727941 0.91970803 0.91651206
|
|
0.91198502 0.91821561 0.91775701 0.93160813]
|
|
|
|
mean value: 0.9180489844367337
|
|
|
|
key: test_recall
|
|
value: [0.89830508 0.96610169 0.96610169 0.94915254 0.86440678 0.89830508
|
|
0.88135593 0.86440678 0.88135593 0.96610169]
|
|
|
|
mean value: 0.9135593220338983
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_8020.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.94161959 0.93785311 0.9472693 0.93973635 0.94915254 0.93032015
|
|
0.91713748 0.93032015 0.92467043 0.94915254]
|
|
|
|
mean value: 0.9367231638418079
|
|
|
|
key: test_roc_auc
|
|
value: [0.87288136 0.95762712 0.94915254 0.92372881 0.87288136 0.88135593
|
|
0.86440678 0.88135593 0.91525424 0.89830508]
|
|
|
|
mean value: 0.9016949152542373
|
|
|
|
key: train_roc_auc
|
|
value: [0.92937853 0.92467043 0.92937853 0.92749529 0.93314501 0.92278719
|
|
0.91431262 0.92372881 0.92090395 0.93973635]
|
|
|
|
mean value: 0.9265536723163842
|
|
|
|
key: test_jcc
|
|
value: [0.77941176 0.91935484 0.9047619 0.86153846 0.77272727 0.79104478
|
|
0.76470588 0.78461538 0.83870968 0.82608696]
|
|
|
|
mean value: 0.8242956919472022
|
|
|
|
key: train_jcc
|
|
value: [0.86956522 0.8615917 0.87024221 0.86631944 0.87652174 0.85763889
|
|
0.84256055 0.85913043 0.85391304 0.88732394]
|
|
|
|
mean value: 0.8644807175445733
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.39048243 0.534688 0.57431149 0.35954332 0.48602343 0.39265013
|
|
0.53873754 0.33329892 0.42959142 0.38816333]
|
|
|
|
mean value: 0.44274899959564207
|
|
|
|
key: score_time
|
|
value: [0.01955986 0.01982021 0.02021503 0.01955557 0.01964808 0.01954484
|
|
0.01968265 0.01957822 0.01400471 0.01961684]
|
|
|
|
mean value: 0.019122600555419922
|
|
|
|
key: test_mcc
|
|
value: [0.76668665 0.91538573 0.88453796 0.83825006 0.74586985 0.7484552
|
|
0.80403577 0.8136762 0.83098605 0.80830501]
|
|
|
|
mean value: 0.8156188487777721
|
|
|
|
key: train_mcc
|
|
value: [0.87803955 0.84963622 0.88807086 0.8707377 0.86673432 0.87216248
|
|
0.89184186 0.88192068 0.86270867 0.89530736]
|
|
|
|
mean value: 0.8757159699049394
|
|
|
|
key: test_accuracy
|
|
value: [0.88135593 0.95762712 0.94067797 0.91525424 0.87288136 0.87288136
|
|
0.89830508 0.90677966 0.91525424 0.89830508]
|
|
|
|
mean value: 0.9059322033898305
|
|
|
|
key: train_accuracy
|
|
value: [0.93879473 0.92467043 0.94350282 0.93502825 0.93314501 0.93596987
|
|
0.94538606 0.94067797 0.93126177 0.9472693 ]
|
|
|
|
mean value: 0.9375706214689266
|
|
|
|
key: test_fscore
|
|
value: [0.88709677 0.95798319 0.94308943 0.92063492 0.87179487 0.87804878
|
|
0.9047619 0.90598291 0.9137931 0.90625 ]
|
|
|
|
mean value: 0.9089435885475852
|
|
|
|
key: train_fscore
|
|
value: [0.93975904 0.92565056 0.94485294 0.93628809 0.93419833 0.93668529
|
|
0.94669118 0.94172063 0.93196645 0.94833948]
|
|
|
|
mean value: 0.9386151982134245
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.95 0.90625 0.86567164 0.87931034 0.84375
|
|
0.85074627 0.9137931 0.92982456 0.84057971]
|
|
|
|
mean value: 0.8826079476425905
|
|
|
|
key: train_precision
|
|
value: [0.92518248 0.91376147 0.92280072 0.91847826 0.91970803 0.92633517
|
|
0.92459605 0.92545455 0.92250923 0.92947559]
|
|
|
|
mean value: 0.9228301541314725
|
|
|
|
key: test_recall
|
|
value: [0.93220339 0.96610169 0.98305085 0.98305085 0.86440678 0.91525424
|
|
0.96610169 0.89830508 0.89830508 0.98305085]
|
|
|
|
mean value: 0.9389830508474577
|
|
|
|
key: train_recall
|
|
value: [0.95480226 0.93785311 0.96798493 0.95480226 0.94915254 0.9472693
|
|
0.96986817 0.95856874 0.94161959 0.96798493]
|
|
|
|
mean value: 0.9549905838041431
|
|
|
|
key: test_roc_auc
|
|
value: [0.88135593 0.95762712 0.94067797 0.91525424 0.87288136 0.87288136
|
|
0.89830508 0.90677966 0.91525424 0.89830508]
|
|
|
|
mean value: 0.9059322033898305
|
|
|
|
key: train_roc_auc
|
|
value: [0.93879473 0.92467043 0.94350282 0.93502825 0.93314501 0.93596987
|
|
0.94538606 0.94067797 0.93126177 0.9472693 ]
|
|
|
|
mean value: 0.9375706214689266
|
|
|
|
key: test_jcc
|
|
value: [0.79710145 0.91935484 0.89230769 0.85294118 0.77272727 0.7826087
|
|
0.82608696 0.828125 0.84126984 0.82857143]
|
|
|
|
mean value: 0.8341094351505776
|
|
|
|
key: train_jcc
|
|
value: [0.88636364 0.8615917 0.89547038 0.88020833 0.87652174 0.88091068
|
|
0.89877836 0.88986014 0.87260035 0.90175439]
|
|
|
|
mean value: 0.884405970499319
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.93
|