19453 lines
947 KiB
Text
19453 lines
947 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 817
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 817
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 175
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification: 70/30
|
|
Input features data size: (467, 175)
|
|
Train data size: (312, 175)
|
|
Test data size: (155, 175)
|
|
y_train numbers: Counter({1: 206, 0: 106})
|
|
y_train ratio: 0.5145631067961165
|
|
|
|
y_test_numbers: Counter({1: 103, 0: 52})
|
|
y_test ratio: 0.5048543689320388
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({1: 206, 0: 106}) Data dim: (312, 175)
|
|
|
|
Simple Random OverSampling
|
|
Counter({1: 206, 0: 206})
|
|
(412, 175)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 106, 1: 106})
|
|
(212, 175)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 206, 1: 206})
|
|
(412, 175)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 206, 0: 206})
|
|
(412, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: 70/30 split
|
|
Gene name: katG
|
|
Drug name: isoniazid
|
|
|
|
Output directory: /home/tanu/git/Data/isoniazid/output/ml/tts_7030/
|
|
|
|
Sanity checks:
|
|
Total input features: 175
|
|
|
|
Training data size: (312, 175)
|
|
Test data size: (155, 175)
|
|
|
|
Target feature numbers (training data): Counter({1: 206, 0: 106})
|
|
Target features ratio (training data: 0.5145631067961165
|
|
|
|
Target feature numbers (test data): Counter({1: 103, 0: 52})
|
|
Target features ratio (test data): 0.5048543689320388
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03233504 0.0333972 0.05516076 0.03526402 0.05350041 0.03630829
|
|
0.0339694 0.04734373 0.03495312 0.03409076]
|
|
|
|
mean value: 0.03963227272033691
|
|
|
|
key: score_time
|
|
value: [0.01238108 0.01217294 0.01510072 0.01515746 0.0121963 0.01514292
|
|
0.01524949 0.01520634 0.01533437 0.01805663]
|
|
|
|
mean value: 0.014599823951721191
|
|
|
|
key: test_mcc
|
|
value: [0.79844727 0.8643122 0.85465477 0.78262379 0.61758068 0.69695062
|
|
0.78625916 0.41684569 0.71818182 0.64116449]
|
|
|
|
mean value: 0.717702048077937
|
|
|
|
key: train_mcc
|
|
value: [0.81579012 0.85703608 0.86517173 0.84883567 0.81654681 0.84252828
|
|
0.856474 0.8816558 0.83375042 0.87296384]
|
|
|
|
mean value: 0.8490752749523592
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.9375 0.93548387 0.90322581 0.83870968 0.87096774
|
|
0.90322581 0.74193548 0.87096774 0.83870968]
|
|
|
|
mean value: 0.8746975806451613
|
|
|
|
key: train_accuracy
|
|
value: [0.91785714 0.93571429 0.93950178 0.93238434 0.91814947 0.92882562
|
|
0.93594306 0.94661922 0.9252669 0.9430605 ]
|
|
|
|
mean value: 0.9323322318251144
|
|
|
|
key: test_fscore
|
|
value: [0.92682927 0.95454545 0.95454545 0.93333333 0.88888889 0.90909091
|
|
0.92682927 0.80952381 0.9 0.88372093]
|
|
|
|
mean value: 0.9087307316745774
|
|
|
|
key: train_fscore
|
|
value: [0.94025974 0.953125 0.95538058 0.95013123 0.93994778 0.94818653
|
|
0.953125 0.96103896 0.94601542 0.95833333]
|
|
|
|
mean value: 0.9505543578996442
|
|
|
|
key: test_precision
|
|
value: [0.95 0.91304348 0.91304348 0.875 0.83333333 0.86956522
|
|
0.9047619 0.77272727 0.9 0.82608696]
|
|
|
|
mean value: 0.8757561641257293
|
|
|
|
key: train_precision
|
|
value: [0.905 0.91959799 0.92857143 0.92346939 0.90909091 0.91044776
|
|
0.92424242 0.92964824 0.90640394 0.92929293]
|
|
|
|
mean value: 0.9185765012189302
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 1. 1. 0.95238095 0.95238095
|
|
0.95 0.85 0.9 0.95 ]
|
|
|
|
mean value: 0.9459523809523809
|
|
|
|
key: train_recall
|
|
value: [0.97837838 0.98918919 0.98378378 0.97837838 0.97297297 0.98918919
|
|
0.98387097 0.99462366 0.98924731 0.98924731]
|
|
|
|
mean value: 0.984888113920372
|
|
|
|
key: test_roc_auc
|
|
value: [0.90692641 0.90909091 0.9 0.85 0.77619048 0.82619048
|
|
0.88409091 0.69772727 0.85909091 0.79318182]
|
|
|
|
mean value: 0.8402489177489177
|
|
|
|
key: train_roc_auc
|
|
value: [0.88918919 0.91038407 0.91897523 0.91106419 0.89273649 0.90084459
|
|
0.91298812 0.92362762 0.89462366 0.92093945]
|
|
|
|
mean value: 0.907537258714572
|
|
|
|
key: test_jcc
|
|
value: [0.86363636 0.91304348 0.91304348 0.875 0.8 0.83333333
|
|
0.86363636 0.68 0.81818182 0.79166667]
|
|
|
|
mean value: 0.8351541501976285
|
|
|
|
key: train_jcc
|
|
value: [0.8872549 0.91044776 0.91457286 0.905 0.88669951 0.90147783
|
|
0.91044776 0.925 0.89756098 0.92 ]
|
|
|
|
mean value: 0.9058461604181686
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.05972672 0.93337655 0.85979128 1.0101614 0.9706459 1.00876999
|
|
0.84041023 0.88820314 0.87697887 0.80039692]
|
|
|
|
mean value: 0.92484610080719
|
|
|
|
key: score_time
|
|
value: [0.02311707 0.0123024 0.01528835 0.0165627 0.01583934 0.01552248
|
|
0.01548195 0.01584959 0.01543498 0.01588607]
|
|
|
|
mean value: 0.01612849235534668
|
|
|
|
key: test_mcc
|
|
value: [0.93435318 0.8643122 0.85238095 0.69695062 1. 0.86831345
|
|
0.78625916 0.68174942 0.93048421 0.71390814]
|
|
|
|
mean value: 0.8328711336410171
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.9375 0.93548387 0.87096774 1. 0.93548387
|
|
0.90322581 0.83870968 0.96774194 0.87096774]
|
|
|
|
mean value: 0.922883064516129
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.95454545 0.95238095 0.90909091 1. 0.95
|
|
0.92682927 0.86486486 0.97560976 0.9047619 ]
|
|
|
|
mean value: 0.941369286613189
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.91304348 0.95238095 0.86956522 1. 1.
|
|
0.9047619 0.94117647 0.95238095 0.86363636]
|
|
|
|
mean value: 0.9396945339400582
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 0.95238095 0.95238095 1. 0.9047619
|
|
0.95 0.8 1. 0.95 ]
|
|
|
|
mean value: 0.9461904761904761
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.90909091 0.92619048 0.82619048 1. 0.95238095
|
|
0.88409091 0.85454545 0.95454545 0.83863636]
|
|
|
|
mean value: 0.9121861471861472
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.91304348 0.90909091 0.83333333 1. 0.9047619
|
|
0.86363636 0.76190476 0.95238095 0.82608696]
|
|
|
|
mean value: 0.8916619612271786
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.8
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01394224 0.01148248 0.01090741 0.01061201 0.0105617 0.01047516
|
|
0.0093956 0.0100913 0.009547 0.00971317]
|
|
|
|
mean value: 0.010672807693481445
|
|
|
|
key: score_time
|
|
value: [0.01587558 0.01017666 0.00976372 0.00977969 0.00974298 0.00903916
|
|
0.00903893 0.00935555 0.00905657 0.00896811]
|
|
|
|
mean value: 0.010079693794250489
|
|
|
|
key: test_mcc
|
|
value: [0.47306844 0.47306844 0.77484502 0.26190476 0.85465477 0.62281846
|
|
0.51793973 0.46277515 0.64116449 0.41684569]
|
|
|
|
mean value: 0.5499084954077021
|
|
|
|
key: train_mcc
|
|
value: [0.53347571 0.54579802 0.6239648 0.58476019 0.5592603 0.59365116
|
|
0.57062818 0.57127446 0.6024335 0.59689593]
|
|
|
|
mean value: 0.5782142268403786
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.90322581 0.67741935 0.93548387 0.83870968
|
|
0.77419355 0.74193548 0.83870968 0.74193548]
|
|
|
|
mean value: 0.7951612903225806
|
|
|
|
key: train_accuracy
|
|
value: [0.79285714 0.78214286 0.83274021 0.81494662 0.80071174 0.81850534
|
|
0.80782918 0.80427046 0.82206406 0.82206406]
|
|
|
|
mean value: 0.8098131672597865
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.8 0.93023256 0.76190476 0.95454545 0.88372093
|
|
0.82051282 0.78947368 0.88372093 0.80952381]
|
|
|
|
mean value: 0.8433634949302024
|
|
|
|
key: train_fscore
|
|
value: [0.84491979 0.8252149 0.87466667 0.86096257 0.84782609 0.86327078
|
|
0.85483871 0.84931507 0.8655914 0.86772487]
|
|
|
|
mean value: 0.8554330827502625
|
|
|
|
key: test_precision
|
|
value: [0.84210526 0.84210526 0.90909091 0.76190476 0.91304348 0.86363636
|
|
0.84210526 0.83333333 0.82608696 0.77272727]
|
|
|
|
mean value: 0.8406138864948933
|
|
|
|
key: train_precision
|
|
value: [0.83597884 0.87804878 0.86315789 0.85185185 0.85245902 0.85638298
|
|
0.85483871 0.86592179 0.8655914 0.85416667]
|
|
|
|
mean value: 0.8578397920075227
|
|
|
|
key: test_recall
|
|
value: [0.76190476 0.76190476 0.95238095 0.76190476 1. 0.9047619
|
|
0.8 0.75 0.95 0.85 ]
|
|
|
|
mean value: 0.8492857142857143
|
|
|
|
key: train_recall
|
|
value: [0.85405405 0.77837838 0.88648649 0.87027027 0.84324324 0.87027027
|
|
0.85483871 0.83333333 0.8655914 0.88172043]
|
|
|
|
mean value: 0.8538186573670445
|
|
|
|
key: test_roc_auc
|
|
value: [0.74458874 0.74458874 0.87619048 0.63095238 0.9 0.80238095
|
|
0.76363636 0.73863636 0.79318182 0.69772727]
|
|
|
|
mean value: 0.7691883116883117
|
|
|
|
key: train_roc_auc
|
|
value: [0.76386913 0.78392603 0.80782658 0.7893018 0.78099662 0.79451014
|
|
0.78531409 0.79035088 0.80121675 0.79349179]
|
|
|
|
mean value: 0.7890803813151012
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.66666667 0.86956522 0.61538462 0.91304348 0.79166667
|
|
0.69565217 0.65217391 0.79166667 0.68 ]
|
|
|
|
mean value: 0.7342486064659978
|
|
|
|
key: train_jcc
|
|
value: [0.73148148 0.70243902 0.77725118 0.75586854 0.73584906 0.75943396
|
|
0.74647887 0.73809524 0.76303318 0.76635514]
|
|
|
|
mean value: 0.7476285681051753
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00983787 0.00968504 0.0097971 0.00964427 0.00997996 0.00982451
|
|
0.01080513 0.01073122 0.01083469 0.01091099]
|
|
|
|
mean value: 0.010205078125
|
|
|
|
key: score_time
|
|
value: [0.00884271 0.00899935 0.00897145 0.0091238 0.00884986 0.00881553
|
|
0.00971413 0.00975108 0.00984645 0.00914001]
|
|
|
|
mean value: 0.009205436706542969
|
|
|
|
key: test_mcc
|
|
value: [0.58441558 0.41281273 0.55714286 0.78262379 0.69695062 0.55714286
|
|
0.64203411 0.48992888 0.56537691 0.40572206]
|
|
|
|
mean value: 0.5694150392239126
|
|
|
|
key: train_mcc
|
|
value: [0.63966715 0.64841162 0.63494589 0.63445555 0.63494589 0.65151226
|
|
0.64020793 0.67456536 0.64134835 0.66575682]
|
|
|
|
mean value: 0.6465816821661099
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.80645161 0.90322581 0.87096774 0.80645161
|
|
0.83870968 0.77419355 0.80645161 0.74193548]
|
|
|
|
mean value: 0.8110887096774193
|
|
|
|
key: train_accuracy
|
|
value: [0.84285714 0.84642857 0.83985765 0.83985765 0.83985765 0.84697509
|
|
0.84341637 0.85765125 0.84341637 0.85409253]
|
|
|
|
mean value: 0.8454410269445857
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.82608696 0.85714286 0.93333333 0.90909091 0.85714286
|
|
0.87804878 0.84444444 0.85714286 0.81818182]
|
|
|
|
mean value: 0.8637757670631477
|
|
|
|
key: train_fscore
|
|
value: [0.88717949 0.88888889 0.88311688 0.88372093 0.88311688 0.88831169
|
|
0.8877551 0.89637306 0.88601036 0.89460154]
|
|
|
|
mean value: 0.8879074824992776
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.76 0.85714286 0.875 0.86956522 0.85714286
|
|
0.85714286 0.76 0.81818182 0.75 ]
|
|
|
|
mean value: 0.8261318464144551
|
|
|
|
key: train_precision
|
|
value: [0.84390244 0.85148515 0.85 0.84653465 0.85 0.855
|
|
0.84466019 0.865 0.855 0.85714286]
|
|
|
|
mean value: 0.8518725292322202
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.9047619 0.85714286 1. 0.95238095 0.85714286
|
|
0.9 0.95 0.9 0.9 ]
|
|
|
|
mean value: 0.9078571428571428
|
|
|
|
key: train_recall
|
|
value: [0.93513514 0.92972973 0.91891892 0.92432432 0.91891892 0.92432432
|
|
0.93548387 0.93010753 0.91935484 0.93548387]
|
|
|
|
mean value: 0.9271781458878233
|
|
|
|
key: test_roc_auc
|
|
value: [0.79220779 0.67965368 0.77857143 0.85 0.82619048 0.77857143
|
|
0.81363636 0.70227273 0.76818182 0.67727273]
|
|
|
|
mean value: 0.7666558441558441
|
|
|
|
key: train_roc_auc
|
|
value: [0.79914651 0.80697013 0.80320946 0.80070383 0.80320946 0.8111205
|
|
0.79932088 0.8229485 0.80704584 0.81511036]
|
|
|
|
mean value: 0.8068785466281222
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.7037037 0.75 0.875 0.83333333 0.75
|
|
0.7826087 0.73076923 0.75 0.69230769]
|
|
|
|
mean value: 0.7617722655766134
|
|
|
|
key: train_jcc
|
|
value: [0.79723502 0.8 0.79069767 0.79166667 0.79069767 0.79906542
|
|
0.79816514 0.81220657 0.79534884 0.80930233]
|
|
|
|
mean value: 0.7984385332281427
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00937772 0.0105772 0.01056385 0.01048541 0.01017141 0.01007199
|
|
0.01010633 0.0100348 0.01004696 0.00996089]
|
|
|
|
mean value: 0.010139656066894532
|
|
|
|
key: score_time
|
|
value: [0.0642364 0.01338673 0.01227045 0.01247025 0.01164246 0.0162518
|
|
0.01173186 0.01165223 0.01177526 0.01154733]
|
|
|
|
mean value: 0.017696475982666014
|
|
|
|
key: test_mcc
|
|
value: [0.44588745 0.52223297 0.38154231 0.40952381 0.69695062 0.28749445
|
|
0.46277515 0.04139187 0.40572206 0.40572206]
|
|
|
|
mean value: 0.4059242743051412
|
|
|
|
key: train_mcc
|
|
value: [0.60529458 0.64881553 0.64252679 0.61776405 0.60874123 0.63405604
|
|
0.62561626 0.62320108 0.64874151 0.59936179]
|
|
|
|
mean value: 0.6254118846679162
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.78125 0.74193548 0.74193548 0.87096774 0.70967742
|
|
0.74193548 0.61290323 0.74193548 0.74193548]
|
|
|
|
mean value: 0.7434475806451613
|
|
|
|
key: train_accuracy
|
|
value: [0.82857143 0.84642857 0.84341637 0.83274021 0.82918149 0.83985765
|
|
0.83629893 0.83629893 0.84697509 0.82562278]
|
|
|
|
mean value: 0.8365391459074734
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.85714286 0.81818182 0.80952381 0.90909091 0.8
|
|
0.78947368 0.73913043 0.81818182 0.81818182]
|
|
|
|
mean value: 0.8168430958819974
|
|
|
|
key: train_fscore
|
|
value: [0.87817259 0.88831169 0.88717949 0.87855297 0.87692308 0.88491049
|
|
0.88020833 0.88265306 0.89002558 0.87338501]
|
|
|
|
mean value: 0.8820322281681761
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.75 0.7826087 0.80952381 0.86956522 0.75
|
|
0.83333333 0.65384615 0.75 0.75 ]
|
|
|
|
mean value: 0.7758401019270584
|
|
|
|
key: train_precision
|
|
value: [0.8277512 0.855 0.84390244 0.84158416 0.83414634 0.83980583
|
|
0.85353535 0.83980583 0.84878049 0.84079602]
|
|
|
|
mean value: 0.8425107646802061
|
|
|
|
key: test_recall
|
|
value: [0.80952381 1. 0.85714286 0.80952381 0.95238095 0.85714286
|
|
0.75 0.85 0.9 0.9 ]
|
|
|
|
mean value: 0.8685714285714285
|
|
|
|
key: train_recall
|
|
value: [0.93513514 0.92432432 0.93513514 0.91891892 0.92432432 0.93513514
|
|
0.90860215 0.93010753 0.93548387 0.90860215]
|
|
|
|
mean value: 0.9255768671897704
|
|
|
|
key: test_roc_auc
|
|
value: [0.72294372 0.68181818 0.67857143 0.7047619 0.82619048 0.62857143
|
|
0.73863636 0.51590909 0.67727273 0.67727273]
|
|
|
|
mean value: 0.6851948051948051
|
|
|
|
key: train_roc_auc
|
|
value: [0.77809388 0.80953058 0.8009009 0.79279279 0.78507883 0.79569257
|
|
0.8016695 0.79136955 0.80458404 0.78588002]
|
|
|
|
mean value: 0.7945592669282185
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.75 0.69230769 0.68 0.83333333 0.66666667
|
|
0.65217391 0.5862069 0.69230769 0.69230769]
|
|
|
|
mean value: 0.6925303886518279
|
|
|
|
key: train_jcc
|
|
value: [0.78280543 0.79906542 0.79723502 0.78341014 0.78082192 0.79357798
|
|
0.78604651 0.78995434 0.80184332 0.77522936]
|
|
|
|
mean value: 0.7889989436472885
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01544476 0.01519156 0.01600599 0.01545453 0.01499534 0.01443052
|
|
0.014117 0.01572728 0.01578069 0.01611376]
|
|
|
|
mean value: 0.015326142311096191
|
|
|
|
key: score_time
|
|
value: [0.01091766 0.01000953 0.01041842 0.01047444 0.01041555 0.01081181
|
|
0.01059771 0.01020598 0.01113415 0.01049471]
|
|
|
|
mean value: 0.010547995567321777
|
|
|
|
key: test_mcc
|
|
value: [0.64764278 0.59458839 0.85465477 0.70992957 0.53924646 0.53924646
|
|
0.79524277 0.48992888 0.56537691 0.48992888]
|
|
|
|
mean value: 0.6225785881062988
|
|
|
|
key: train_mcc
|
|
value: [0.71193719 0.71193719 0.67923746 0.71282383 0.71427799 0.69833241
|
|
0.6770099 0.73511086 0.68521564 0.70157594]
|
|
|
|
mean value: 0.7027458404320319
|
|
|
|
key: test_accuracy
|
|
value: [0.84375 0.8125 0.93548387 0.87096774 0.80645161 0.80645161
|
|
0.90322581 0.77419355 0.80645161 0.77419355]
|
|
|
|
mean value: 0.833366935483871
|
|
|
|
key: train_accuracy
|
|
value: [0.87142857 0.87142857 0.85765125 0.87188612 0.87188612 0.86476868
|
|
0.85765125 0.88256228 0.86120996 0.8683274 ]
|
|
|
|
mean value: 0.8678800203355364
|
|
|
|
key: test_fscore
|
|
value: [0.88372093 0.875 0.95454545 0.91304348 0.86363636 0.86363636
|
|
0.93023256 0.84444444 0.85714286 0.84444444]
|
|
|
|
mean value: 0.8829846894482891
|
|
|
|
key: train_fscore
|
|
value: [0.90954774 0.90954774 0.89949749 0.90909091 0.90954774 0.905
|
|
0.9 0.9164557 0.90225564 0.90680101]
|
|
|
|
mean value: 0.9067743955465448
|
|
|
|
key: test_precision
|
|
value: [0.86363636 0.77777778 0.91304348 0.84 0.82608696 0.82608696
|
|
0.86956522 0.76 0.81818182 0.76 ]
|
|
|
|
mean value: 0.8254378568291612
|
|
|
|
key: train_precision
|
|
value: [0.84976526 0.84976526 0.84037559 0.85308057 0.84976526 0.84186047
|
|
0.8411215 0.86602871 0.84507042 0.85308057]
|
|
|
|
mean value: 0.848991359005567
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 1. 1. 0.9047619 0.9047619 1.
|
|
0.95 0.9 0.95 ]
|
|
|
|
mean value: 0.9514285714285714
|
|
|
|
key: train_recall
|
|
value: [0.97837838 0.97837838 0.96756757 0.97297297 0.97837838 0.97837838
|
|
0.96774194 0.97311828 0.96774194 0.96774194]
|
|
|
|
mean value: 0.973039814007556
|
|
|
|
key: test_roc_auc
|
|
value: [0.81601732 0.72727273 0.9 0.8 0.75238095 0.75238095
|
|
0.86363636 0.70227273 0.76818182 0.70227273]
|
|
|
|
mean value: 0.7784415584415585
|
|
|
|
key: train_roc_auc
|
|
value: [0.82076814 0.82076814 0.80670045 0.82502815 0.82252252 0.81210586
|
|
0.8049236 0.83919072 0.81018676 0.82071307]
|
|
|
|
mean value: 0.8182907403371114
|
|
|
|
key: test_jcc
|
|
value: [0.79166667 0.77777778 0.91304348 0.84 0.76 0.76
|
|
0.86956522 0.73076923 0.75 0.73076923]
|
|
|
|
mean value: 0.7923591601635079
|
|
|
|
key: train_jcc
|
|
value: [0.83410138 0.83410138 0.8173516 0.83333333 0.83410138 0.82648402
|
|
0.81818182 0.84579439 0.82191781 0.82949309]
|
|
|
|
mean value: 0.8294860203719092
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.38122225 1.35782647 1.14616299 1.49619937 1.3146069 1.23372722
|
|
1.31832862 1.14557314 1.35891151 1.14897418]
|
|
|
|
mean value: 1.2901532649993896
|
|
|
|
key: score_time
|
|
value: [0.02321529 0.01550245 0.01513743 0.01537633 0.02473903 0.01361775
|
|
0.01516891 0.0152483 0.01541519 0.01537943]
|
|
|
|
mean value: 0.016880011558532713
|
|
|
|
key: test_mcc
|
|
value: [0.87496729 0.73112616 0.77484502 0.69695062 0.70992957 0.72664126
|
|
0.78625916 0.43636364 0.85909091 0.51793973]
|
|
|
|
mean value: 0.7114113350240479
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.875 0.90322581 0.87096774 0.87096774 0.87096774
|
|
0.90322581 0.74193548 0.93548387 0.77419355]
|
|
|
|
mean value: 0.8683467741935483
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95 0.91304348 0.93023256 0.90909091 0.91304348 0.9
|
|
0.92682927 0.8 0.95 0.82051282]
|
|
|
|
mean value: 0.9012752512557687
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.84 0.90909091 0.86956522 0.84 0.94736842
|
|
0.9047619 0.8 0.95 0.84210526]
|
|
|
|
mean value: 0.8902891715454644
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 0.95238095 0.95238095 1. 0.85714286
|
|
0.95 0.8 0.95 0.8 ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.81818182 0.87619048 0.82619048 0.8 0.87857143
|
|
0.88409091 0.71818182 0.92954545 0.76363636]
|
|
|
|
mean value: 0.8446969696969697
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9047619 0.84 0.86956522 0.83333333 0.84 0.81818182
|
|
0.86363636 0.66666667 0.9047619 0.69565217]
|
|
|
|
mean value: 0.8236559382646339
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02104592 0.01962471 0.01438522 0.01636267 0.01385927 0.01335001
|
|
0.01686931 0.01633525 0.01711345 0.0134964 ]
|
|
|
|
mean value: 0.016244220733642577
|
|
|
|
key: score_time
|
|
value: [0.01261473 0.00928545 0.00903654 0.00882006 0.00878167 0.00882888
|
|
0.00888181 0.00890875 0.0089407 0.00888491]
|
|
|
|
mean value: 0.009298348426818847
|
|
|
|
key: test_mcc
|
|
value: [0.87496729 0.87496729 0.93048421 0.78625916 1. 0.86831345
|
|
0.85909091 0.85909091 0.79476958 0.72821908]
|
|
|
|
mean value: 0.8576161893707721
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.9375 0.96774194 0.90322581 1. 0.93548387
|
|
0.93548387 0.93548387 0.90322581 0.87096774]
|
|
|
|
mean value: 0.9326612903225806
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95 0.95 0.97560976 0.92682927 1. 0.95
|
|
0.95 0.95 0.92307692 0.90909091]
|
|
|
|
mean value: 0.9484606856558077
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.95 1. 1.
|
|
0.95 0.95 0.94736842 0.83333333]
|
|
|
|
mean value: 0.9630701754385965
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9047619 0.9047619 0.95238095 0.9047619 1. 0.9047619
|
|
0.95 0.95 0.9 1. ]
|
|
|
|
mean value: 0.9371428571428572
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.95238095 0.97619048 0.90238095 1. 0.95238095
|
|
0.92954545 0.92954545 0.90454545 0.81818182]
|
|
|
|
mean value: 0.9317532467532467
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9047619 0.9047619 0.95238095 0.86363636 1. 0.9047619
|
|
0.9047619 0.9047619 0.85714286 0.83333333]
|
|
|
|
mean value: 0.9030303030303031
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.86
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10296011 0.1014595 0.10357857 0.10675001 0.1063571 0.10523701
|
|
0.10918045 0.10397196 0.10834646 0.10044217]
|
|
|
|
mean value: 0.1048283338546753
|
|
|
|
key: score_time
|
|
value: [0.01882553 0.01874995 0.01823163 0.01881695 0.01868272 0.01878595
|
|
0.01914263 0.01873136 0.01930165 0.01734304]
|
|
|
|
mean value: 0.018661141395568848
|
|
|
|
key: test_mcc
|
|
value: [0.93154098 0.73112616 0.85465477 0.78262379 0.69695062 0.62281846
|
|
0.78625916 0.33300791 0.85909091 0.56697057]
|
|
|
|
mean value: 0.7165043330318461
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.875 0.93548387 0.90322581 0.87096774 0.83870968
|
|
0.90322581 0.70967742 0.93548387 0.80645161]
|
|
|
|
mean value: 0.8746975806451612
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97674419 0.91304348 0.95454545 0.93333333 0.90909091 0.88372093
|
|
0.92682927 0.79069767 0.95 0.86363636]
|
|
|
|
mean value: 0.9101641597857287
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95454545 0.84 0.91304348 0.875 0.86956522 0.86363636
|
|
0.9047619 0.73913043 0.95 0.79166667]
|
|
|
|
mean value: 0.8701349520045172
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.95238095 0.9047619
|
|
0.95 0.85 0.95 0.95 ]
|
|
|
|
mean value: 0.9557142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.81818182 0.9 0.85 0.82619048 0.80238095
|
|
0.88409091 0.65227273 0.92954545 0.74772727]
|
|
|
|
mean value: 0.8364935064935064
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95454545 0.84 0.91304348 0.875 0.83333333 0.79166667
|
|
0.86363636 0.65384615 0.9047619 0.76 ]
|
|
|
|
mean value: 0.8389833355050746
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00974774 0.00936413 0.00939369 0.00947523 0.00935888 0.0093565
|
|
0.00939965 0.00949359 0.00945926 0.00952435]
|
|
|
|
mean value: 0.00945730209350586
|
|
|
|
key: score_time
|
|
value: [0.00867534 0.008955 0.00860119 0.00857806 0.00861835 0.00871873
|
|
0.00860906 0.00866818 0.00875998 0.00864625]
|
|
|
|
mean value: 0.008683013916015624
|
|
|
|
key: test_mcc
|
|
value: [0.71797362 0.57163505 0.31876536 0.36059915 0.30162467 0.26190476
|
|
0.78625916 0.33300791 0.78625916 0.24110987]
|
|
|
|
mean value: 0.4679138713781422
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.8125 0.70967742 0.74193548 0.67741935 0.67741935
|
|
0.90322581 0.70967742 0.90322581 0.64516129]
|
|
|
|
mean value: 0.7655241935483871
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.86363636 0.79069767 0.82608696 0.75 0.76190476
|
|
0.92682927 0.79069767 0.92682927 0.71794872]
|
|
|
|
mean value: 0.8263721594525066
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.86956522 0.82608696 0.77272727 0.76 0.78947368 0.76190476
|
|
0.9047619 0.73913043 0.9047619 0.73684211]
|
|
|
|
mean value: 0.806525424232518
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.9047619 0.80952381 0.9047619 0.71428571 0.76190476
|
|
0.95 0.85 0.95 0.7 ]
|
|
|
|
mean value: 0.8497619047619047
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83982684 0.77056277 0.6547619 0.65238095 0.65714286 0.63095238
|
|
0.88409091 0.65227273 0.88409091 0.62272727]
|
|
|
|
mean value: 0.7248809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.76 0.65384615 0.7037037 0.6 0.61538462
|
|
0.86363636 0.65384615 0.86363636 0.56 ]
|
|
|
|
mean value: 0.7107386687386688
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.40060687 1.47927785 1.49632025 1.38435006 1.42914748 1.39843106
|
|
1.60974932 1.43152332 1.42072606 1.414078 ]
|
|
|
|
mean value: 1.4464210271835327
|
|
|
|
key: score_time
|
|
value: [0.09651136 0.09766483 0.09662008 0.09672141 0.09631658 0.09969044
|
|
0.11077142 0.09088016 0.09754944 0.0947938 ]
|
|
|
|
mean value: 0.09775195121765137
|
|
|
|
key: test_mcc
|
|
value: [1. 0.8643122 0.85465477 0.78262379 0.92687157 0.85238095
|
|
0.86243936 0.71390814 0.93048421 0.79524277]
|
|
|
|
mean value: 0.8582917769162332
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9375 0.93548387 0.90322581 0.96774194 0.93548387
|
|
0.93548387 0.87096774 0.96774194 0.90322581]
|
|
|
|
mean value: 0.9356854838709677
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95454545 0.95454545 0.93333333 0.97674419 0.95238095
|
|
0.95238095 0.9047619 0.97560976 0.93023256]
|
|
|
|
mean value: 0.9534534552231659
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.91304348 0.91304348 0.875 0.95454545 0.95238095
|
|
0.90909091 0.86363636 0.95238095 0.86956522]
|
|
|
|
mean value: 0.9202686805947675
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.95238095
|
|
1. 0.95 1. 1. ]
|
|
|
|
mean value: 0.9902380952380953
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.90909091 0.9 0.85 0.95 0.92619048
|
|
0.90909091 0.83863636 0.95454545 0.86363636]
|
|
|
|
mean value: 0.9101190476190476
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.91304348 0.91304348 0.875 0.95454545 0.90909091
|
|
0.90909091 0.82608696 0.95238095 0.86956522]
|
|
|
|
mean value: 0.9121847355543008
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.8423326 0.92857409 1.05132771 0.89933038 0.9478097 0.90112305
|
|
0.93446851 1.02135634 0.94023633 0.90203786]
|
|
|
|
mean value: 1.036859655380249
|
|
|
|
key: score_time
|
|
value: [0.26423621 0.21858096 0.18165159 0.18472934 0.222363 0.13671899
|
|
0.13786745 0.13727784 0.21312284 0.21497941]
|
|
|
|
mean value: 0.19115276336669923
|
|
|
|
key: test_mcc
|
|
value: [0.93154098 0.8643122 0.85465477 0.69695062 0.85465477 0.69695062
|
|
0.86243936 0.71390814 0.86243936 0.72821908]
|
|
|
|
mean value: 0.8066069905007348
|
|
|
|
key: train_mcc
|
|
value: [0.93692544 0.95258202 0.9296276 0.94513672 0.94513672 0.94513672
|
|
0.95266247 0.95266247 0.96050414 0.944838 ]
|
|
|
|
mean value: 0.9465212313571784
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.9375 0.93548387 0.87096774 0.93548387 0.87096774
|
|
0.93548387 0.87096774 0.93548387 0.87096774]
|
|
|
|
mean value: 0.9132056451612903
|
|
|
|
key: train_accuracy
|
|
value: [0.97142857 0.97857143 0.96797153 0.97508897 0.97508897 0.97508897
|
|
0.97864769 0.97864769 0.98220641 0.97508897]
|
|
|
|
mean value: 0.9757829181494662
|
|
|
|
key: test_fscore
|
|
value: [0.97674419 0.95454545 0.95454545 0.90909091 0.95454545 0.90909091
|
|
0.95238095 0.9047619 0.95238095 0.90909091]
|
|
|
|
mean value: 0.9377177086479411
|
|
|
|
key: train_fscore
|
|
value: [0.97883598 0.98404255 0.9762533 0.98143236 0.98143236 0.98143236
|
|
0.98412698 0.98412698 0.9867374 0.98153034]
|
|
|
|
mean value: 0.9819950624201007
|
|
|
|
key: test_precision
|
|
value: [0.95454545 0.91304348 0.91304348 0.86956522 0.91304348 0.86956522
|
|
0.90909091 0.86363636 0.90909091 0.83333333]
|
|
|
|
mean value: 0.8947957839262187
|
|
|
|
key: train_precision
|
|
value: [0.95854922 0.96858639 0.95360825 0.96354167 0.96354167 0.96354167
|
|
0.96875 0.96875 0.97382199 0.96373057]
|
|
|
|
mean value: 0.9646421417132145
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.95238095 1. 0.95238095
|
|
1. 0.95 1. 1. ]
|
|
|
|
mean value: 0.9854761904761905
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.90909091 0.9 0.82619048 0.9 0.82619048
|
|
0.90909091 0.83863636 0.90909091 0.81818182]
|
|
|
|
mean value: 0.8791017316017316
|
|
|
|
key: train_roc_auc
|
|
value: [0.95789474 0.96842105 0.953125 0.96354167 0.96354167 0.96354167
|
|
0.96842105 0.96842105 0.97368421 0.96315789]
|
|
|
|
mean value: 0.964375
|
|
|
|
key: test_jcc
|
|
value: [0.95454545 0.91304348 0.91304348 0.83333333 0.91304348 0.83333333
|
|
0.90909091 0.82608696 0.90909091 0.83333333]
|
|
|
|
mean value: 0.8837944664031621
|
|
|
|
key: train_jcc
|
|
value: [0.95854922 0.96858639 0.95360825 0.96354167 0.96354167 0.96354167
|
|
0.96875 0.96875 0.97382199 0.96373057]
|
|
|
|
mean value: 0.9646421417132145
|
|
|
|
MCC on Blind test: 0.9
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02401733 0.00943089 0.00983357 0.01020646 0.00944304 0.01021004
|
|
0.0097549 0.00990272 0.00948167 0.00955868]
|
|
|
|
mean value: 0.011183929443359376
|
|
|
|
key: score_time
|
|
value: [0.01194978 0.00885653 0.00887227 0.00913167 0.00884962 0.00915885
|
|
0.0091846 0.00890708 0.00913548 0.00879622]
|
|
|
|
mean value: 0.009284210205078126
|
|
|
|
key: test_mcc
|
|
value: [0.58441558 0.41281273 0.55714286 0.78262379 0.69695062 0.55714286
|
|
0.64203411 0.48992888 0.56537691 0.40572206]
|
|
|
|
mean value: 0.5694150392239126
|
|
|
|
key: train_mcc
|
|
value: [0.63966715 0.64841162 0.63494589 0.63445555 0.63494589 0.65151226
|
|
0.64020793 0.67456536 0.64134835 0.66575682]
|
|
|
|
mean value: 0.6465816821661099
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.80645161 0.90322581 0.87096774 0.80645161
|
|
0.83870968 0.77419355 0.80645161 0.74193548]
|
|
|
|
mean value: 0.8110887096774193
|
|
|
|
key: train_accuracy
|
|
value: [0.84285714 0.84642857 0.83985765 0.83985765 0.83985765 0.84697509
|
|
0.84341637 0.85765125 0.84341637 0.85409253]
|
|
|
|
mean value: 0.8454410269445857
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.82608696 0.85714286 0.93333333 0.90909091 0.85714286
|
|
0.87804878 0.84444444 0.85714286 0.81818182]
|
|
|
|
mean value: 0.8637757670631477
|
|
|
|
key: train_fscore
|
|
value: [0.88717949 0.88888889 0.88311688 0.88372093 0.88311688 0.88831169
|
|
0.8877551 0.89637306 0.88601036 0.89460154]
|
|
|
|
mean value: 0.8879074824992776
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.76 0.85714286 0.875 0.86956522 0.85714286
|
|
0.85714286 0.76 0.81818182 0.75 ]
|
|
|
|
mean value: 0.8261318464144551
|
|
|
|
key: train_precision
|
|
value: [0.84390244 0.85148515 0.85 0.84653465 0.85 0.855
|
|
0.84466019 0.865 0.855 0.85714286]
|
|
|
|
mean value: 0.8518725292322202
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.9047619 0.85714286 1. 0.95238095 0.85714286
|
|
0.9 0.95 0.9 0.9 ]
|
|
|
|
mean value: 0.9078571428571428
|
|
|
|
key: train_recall
|
|
value: [0.93513514 0.92972973 0.91891892 0.92432432 0.91891892 0.92432432
|
|
0.93548387 0.93010753 0.91935484 0.93548387]
|
|
|
|
mean value: 0.9271781458878233
|
|
|
|
key: test_roc_auc
|
|
value: [0.79220779 0.67965368 0.77857143 0.85 0.82619048 0.77857143
|
|
0.81363636 0.70227273 0.76818182 0.67727273]
|
|
|
|
mean value: 0.7666558441558441
|
|
|
|
key: train_roc_auc
|
|
value: [0.79914651 0.80697013 0.80320946 0.80070383 0.80320946 0.8111205
|
|
0.79932088 0.8229485 0.80704584 0.81511036]
|
|
|
|
mean value: 0.8068785466281222
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.7037037 0.75 0.875 0.83333333 0.75
|
|
0.7826087 0.73076923 0.75 0.69230769]
|
|
|
|
mean value: 0.7617722655766134
|
|
|
|
key: train_jcc
|
|
value: [0.79723502 0.8 0.79069767 0.79166667 0.79069767 0.79906542
|
|
0.79816514 0.81220657 0.79534884 0.80930233]
|
|
|
|
mean value: 0.7984385332281427
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.09754491 0.05398941 0.05331159 0.07382202 0.05403852 0.07611632
|
|
0.05666065 0.05597878 0.05435252 0.06311226]
|
|
|
|
mean value: 0.06389269828796387
|
|
|
|
key: score_time
|
|
value: [0.01084304 0.01037145 0.01035023 0.01094937 0.01031685 0.01092744
|
|
0.0106082 0.01048756 0.01035881 0.01045704]
|
|
|
|
mean value: 0.010566997528076171
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 0.85238095 0.93048421 0.93048421
|
|
0.85909091 0.93048421 0.85909091 0.72821908]
|
|
|
|
mean value: 0.9090234483012603
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 0.93548387 0.96774194 0.96774194
|
|
0.93548387 0.96774194 0.93548387 0.87096774]
|
|
|
|
mean value: 0.9580645161290322
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 0.95238095 0.97560976 0.97560976
|
|
0.95 0.97560976 0.95 0.90909091]
|
|
|
|
mean value: 0.9688301129764545
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.95238095 1. 1.
|
|
0.95 0.95238095 0.95 0.83333333]
|
|
|
|
mean value: 0.9638095238095238
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.95238095 0.95238095 0.95238095
|
|
0.95 1. 0.95 1. ]
|
|
|
|
mean value: 0.9757142857142856
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 0.92619048 0.97619048 0.97619048
|
|
0.92954545 0.95454545 0.92954545 0.81818182]
|
|
|
|
mean value: 0.951038961038961
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 0.90909091 0.95238095 0.95238095
|
|
0.9047619 0.95238095 0.9047619 0.83333333]
|
|
|
|
mean value: 0.9409090909090909
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.88
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04327488 0.05089211 0.07965779 0.06433558 0.03197861 0.06568432
|
|
0.03882575 0.05931139 0.10036445 0.07465005]
|
|
|
|
mean value: 0.06089749336242676
|
|
|
|
key: score_time
|
|
value: [0.01218843 0.02063107 0.02385974 0.01202154 0.01201749 0.01207352
|
|
0.02089715 0.02120233 0.02075076 0.02160144]
|
|
|
|
mean value: 0.01772434711456299
|
|
|
|
key: test_mcc
|
|
value: [0.93154098 0.79844727 0.78625916 0.77484502 0.78625916 0.64203411
|
|
0.79476958 0.54627358 0.79524277 0.72821908]
|
|
|
|
mean value: 0.7583890697728666
|
|
|
|
key: train_mcc
|
|
value: [0.9760722 0.9760722 0.98417793 0.98417793 0.98417793 0.99210029
|
|
0.97611544 0.98409734 0.98409734 0.98409734]
|
|
|
|
mean value: 0.982518594350976
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.90625 0.90322581 0.90322581 0.90322581 0.83870968
|
|
0.90322581 0.77419355 0.90322581 0.87096774]
|
|
|
|
mean value: 0.8875
|
|
|
|
key: train_accuracy
|
|
value: [0.98928571 0.98928571 0.99288256 0.99288256 0.99288256 0.99644128
|
|
0.98932384 0.99288256 0.99288256 0.99288256]
|
|
|
|
mean value: 0.9921631926792069
|
|
|
|
key: test_fscore
|
|
value: [0.97674419 0.92682927 0.92682927 0.93023256 0.92682927 0.87804878
|
|
0.92307692 0.81081081 0.93023256 0.90909091]
|
|
|
|
mean value: 0.9138724530670078
|
|
|
|
key: train_fscore
|
|
value: [0.99191375 0.99191375 0.99459459 0.99459459 0.99459459 0.99730458
|
|
0.9919571 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9940743931555058
|
|
|
|
key: test_precision
|
|
value: [0.95454545 0.95 0.95 0.90909091 0.95 0.9
|
|
0.94736842 0.88235294 0.86956522 0.83333333]
|
|
|
|
mean value: 0.9146256276590103
|
|
|
|
key: train_precision
|
|
value: [0.98924731 0.98924731 0.99459459 0.99459459 0.99459459 0.99462366
|
|
0.98930481 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9930077843929837
|
|
|
|
key: test_recall
|
|
value: [1. 0.9047619 0.9047619 0.95238095 0.9047619 0.85714286
|
|
0.9 0.75 1. 1. ]
|
|
|
|
mean value: 0.9173809523809524
|
|
|
|
key: train_recall
|
|
value: [0.99459459 0.99459459 0.99459459 0.99459459 0.99459459 1.
|
|
0.99462366 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9951467596628887
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.90692641 0.90238095 0.87619048 0.90238095 0.82857143
|
|
0.90454545 0.78409091 0.86363636 0.81818182]
|
|
|
|
mean value: 0.8741450216450216
|
|
|
|
key: train_roc_auc
|
|
value: [0.98677098 0.98677098 0.99208896 0.99208896 0.99208896 0.99479167
|
|
0.98678551 0.99204867 0.99204867 0.99204867]
|
|
|
|
mean value: 0.990753204392848
|
|
|
|
key: test_jcc
|
|
value: [0.95454545 0.86363636 0.86363636 0.86956522 0.86363636 0.7826087
|
|
0.85714286 0.68181818 0.86956522 0.83333333]
|
|
|
|
mean value: 0.8439488048183701
|
|
|
|
key: train_jcc
|
|
value: [0.98395722 0.98395722 0.98924731 0.98924731 0.98924731 0.99462366
|
|
0.98404255 0.98930481 0.98930481 0.98930481]
|
|
|
|
mean value: 0.9882237021594686
|
|
|
|
MCC on Blind test: 0.8
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02474356 0.00953603 0.0093832 0.00936699 0.00912547 0.00907516
|
|
0.00918412 0.00904965 0.00911117 0.0090394 ]
|
|
|
|
mean value: 0.010761475563049317
|
|
|
|
key: score_time
|
|
value: [0.00934124 0.00899363 0.00895071 0.00882339 0.0085876 0.00857353
|
|
0.00863934 0.00857663 0.00860953 0.00857806]
|
|
|
|
mean value: 0.008767366409301758
|
|
|
|
key: test_mcc
|
|
value: [0.44588745 0.39072951 0.85465477 0.62281846 0.69695062 0.64203411
|
|
0.71390814 0.4870862 0.64203411 0.48992888]
|
|
|
|
mean value: 0.5986032237928653
|
|
|
|
key: train_mcc
|
|
value: [0.67373058 0.63158537 0.67624759 0.63494589 0.65206677 0.64299145
|
|
0.6421061 0.65286643 0.69099047 0.6421061 ]
|
|
|
|
mean value: 0.6539636746974534
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.71875 0.93548387 0.83870968 0.87096774 0.83870968
|
|
0.87096774 0.77419355 0.83870968 0.77419355]
|
|
|
|
mean value: 0.8210685483870968
|
|
|
|
key: train_accuracy
|
|
value: [0.85714286 0.83928571 0.85765125 0.83985765 0.84697509 0.84341637
|
|
0.84341637 0.84697509 0.86476868 0.84341637]
|
|
|
|
mean value: 0.8482905439755973
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.7804878 0.95454545 0.88372093 0.90909091 0.87804878
|
|
0.9047619 0.8372093 0.87804878 0.84444444]
|
|
|
|
mean value: 0.867988212077832
|
|
|
|
key: train_fscore
|
|
value: [0.89637306 0.88372093 0.89637306 0.88311688 0.88772846 0.88601036
|
|
0.88541667 0.88654354 0.9025641 0.88541667]
|
|
|
|
mean value: 0.8893263721080894
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.8 0.91304348 0.86363636 0.86956522 0.9
|
|
0.86363636 0.7826087 0.85714286 0.76 ]
|
|
|
|
mean value: 0.8419156785243742
|
|
|
|
key: train_precision
|
|
value: [0.86069652 0.84653465 0.86069652 0.85 0.85858586 0.85074627
|
|
0.85858586 0.87046632 0.8627451 0.85858586]
|
|
|
|
mean value: 0.8577642951988248
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.76190476 1. 0.9047619 0.95238095 0.85714286
|
|
0.95 0.9 0.9 0.95 ]
|
|
|
|
mean value: 0.8985714285714286
|
|
|
|
key: train_recall
|
|
value: [0.93513514 0.92432432 0.93513514 0.91891892 0.91891892 0.92432432
|
|
0.91397849 0.90322581 0.94623656 0.91397849]
|
|
|
|
mean value: 0.9234176111595467
|
|
|
|
key: test_roc_auc
|
|
value: [0.72294372 0.6991342 0.9 0.80238095 0.82619048 0.82857143
|
|
0.83863636 0.72272727 0.81363636 0.70227273]
|
|
|
|
mean value: 0.7856493506493506
|
|
|
|
key: train_roc_auc
|
|
value: [0.82019915 0.79900427 0.82173423 0.80320946 0.81362613 0.80591216
|
|
0.80962083 0.82003396 0.82574986 0.80962083]
|
|
|
|
mean value: 0.8128710862815277
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.64 0.91304348 0.79166667 0.83333333 0.7826087
|
|
0.82608696 0.72 0.7826087 0.73076923]
|
|
|
|
mean value: 0.7700117056856187
|
|
|
|
key: train_jcc
|
|
value: [0.81220657 0.79166667 0.81220657 0.79069767 0.79812207 0.79534884
|
|
0.79439252 0.79620853 0.82242991 0.79439252]
|
|
|
|
mean value: 0.8007671873638894
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01220751 0.01751304 0.01987648 0.01620412 0.01534367 0.02069497
|
|
0.01594377 0.0216136 0.01784015 0.0183599 ]
|
|
|
|
mean value: 0.01755971908569336
|
|
|
|
key: score_time
|
|
value: [0.00858617 0.01100492 0.01094866 0.01182079 0.01149249 0.01157546
|
|
0.01156306 0.01166034 0.01157475 0.01156259]
|
|
|
|
mean value: 0.011178922653198243
|
|
|
|
key: test_mcc
|
|
value: [0.93154098 0.8643122 0.76041521 0.32857143 0.71269665 0.6681531
|
|
0.71818182 0.73603286 0.79476958 0.57727273]
|
|
|
|
mean value: 0.709194655123876
|
|
|
|
key: train_mcc
|
|
value: [0.92133213 0.98411246 0.94698073 0.52715368 0.74626689 0.83439572
|
|
0.8969264 1. 0.95325088 0.96021134]
|
|
|
|
mean value: 0.8770630246730166
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.9375 0.87096774 0.58064516 0.83870968 0.80645161
|
|
0.87096774 0.87096774 0.90322581 0.80645161]
|
|
|
|
mean value: 0.8454637096774194
|
|
|
|
key: train_accuracy
|
|
value: [0.96428571 0.99285714 0.97508897 0.69039146 0.86476868 0.91459075
|
|
0.95373665 1. 0.97864769 0.98220641]
|
|
|
|
mean value: 0.9316573462125064
|
|
|
|
key: test_fscore
|
|
value: [0.97674419 0.95454545 0.89473684 0.58064516 0.86486486 0.83333333
|
|
0.9 0.89473684 0.92307692 0.85 ]
|
|
|
|
mean value: 0.8672683607367936
|
|
|
|
key: train_fscore
|
|
value: [0.97368421 0.99462366 0.98071625 0.69257951 0.88690476 0.93063584
|
|
0.96495957 1. 0.98369565 0.98666667]
|
|
|
|
mean value: 0.9394466112812958
|
|
|
|
key: test_precision
|
|
value: [0.95454545 0.91304348 1. 0.9 1. 1.
|
|
0.9 0.94444444 0.94736842 0.85 ]
|
|
|
|
mean value: 0.9409401798303401
|
|
|
|
key: train_precision
|
|
value: [0.94871795 0.98930481 1. 1. 0.98675497 1.
|
|
0.96756757 1. 0.99450549 0.97883598]
|
|
|
|
mean value: 0.9865686769348632
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.80952381 0.42857143 0.76190476 0.71428571
|
|
0.9 0.85 0.9 0.85 ]
|
|
|
|
mean value: 0.8214285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.96216216 0.52972973 0.80540541 0.87027027
|
|
0.96236559 1. 0.97311828 0.99462366]
|
|
|
|
mean value: 0.9097675094449288
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.90909091 0.9047619 0.66428571 0.88095238 0.85714286
|
|
0.85909091 0.87954545 0.90454545 0.78863636]
|
|
|
|
mean value: 0.8602597402597403
|
|
|
|
key: train_roc_auc
|
|
value: [0.94736842 0.98947368 0.98108108 0.76486486 0.89228604 0.93513514
|
|
0.94960385 1. 0.98129598 0.9762592 ]
|
|
|
|
mean value: 0.941736824897903
|
|
|
|
key: test_jcc
|
|
value: [0.95454545 0.91304348 0.80952381 0.40909091 0.76190476 0.71428571
|
|
0.81818182 0.80952381 0.85714286 0.73913043]
|
|
|
|
mean value: 0.7786373047242613
|
|
|
|
key: train_jcc
|
|
value: [0.94871795 0.98930481 0.96216216 0.52972973 0.79679144 0.87027027
|
|
0.93229167 1. 0.96791444 0.97368421]
|
|
|
|
mean value: 0.8970866683260259
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01702142 0.01537132 0.01510477 0.01628637 0.01565433 0.0167551
|
|
0.0152967 0.0162673 0.01603389 0.01621771]
|
|
|
|
mean value: 0.016000890731811525
|
|
|
|
key: score_time
|
|
value: [0.0117135 0.01158285 0.01149917 0.01162124 0.01162434 0.01160312
|
|
0.0116601 0.01162314 0.01154995 0.01157784]
|
|
|
|
mean value: 0.011605525016784668
|
|
|
|
key: test_mcc
|
|
value: [0.93154098 0.79772404 0.67215385 0.85465477 0.51176632 0.78625916
|
|
0.78625916 0.5375332 0.85909091 0.79524277]
|
|
|
|
mean value: 0.7532225152872797
|
|
|
|
key: train_mcc
|
|
value: [0.9284967 0.83638515 0.85422716 0.9218965 0.68288051 0.98417793
|
|
0.91545327 0.89775184 0.93617969 0.9136949 ]
|
|
|
|
mean value: 0.8871143639501542
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.90625 0.83870968 0.93548387 0.67741935 0.90322581
|
|
0.90322581 0.74193548 0.93548387 0.90322581]
|
|
|
|
mean value: 0.8713709677419355
|
|
|
|
key: train_accuracy
|
|
value: [0.96785714 0.925 0.92882562 0.96441281 0.81494662 0.99288256
|
|
0.96085409 0.95017794 0.97153025 0.96085409]
|
|
|
|
mean value: 0.9437341128622267
|
|
|
|
key: test_fscore
|
|
value: [0.97674419 0.93333333 0.87179487 0.95454545 0.6875 0.92682927
|
|
0.92682927 0.76470588 0.95 0.93023256]
|
|
|
|
mean value: 0.8922514822798013
|
|
|
|
key: train_fscore
|
|
value: [0.97612732 0.94629156 0.94350282 0.97368421 0.83647799 0.99459459
|
|
0.96986301 0.96089385 0.97860963 0.97127937]
|
|
|
|
mean value: 0.9551324365942089
|
|
|
|
key: test_precision
|
|
value: [0.95454545 0.875 0.94444444 0.91304348 1. 0.95
|
|
0.9047619 0.92857143 0.95 0.86956522]
|
|
|
|
mean value: 0.9289931927975406
|
|
|
|
key: train_precision
|
|
value: [0.95833333 0.89805825 0.98816568 0.94871795 1. 0.99459459
|
|
0.98882682 1. 0.97340426 0.94416244]
|
|
|
|
mean value: 0.9694263317056264
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.80952381 1. 0.52380952 0.9047619
|
|
0.95 0.65 0.95 1. ]
|
|
|
|
mean value: 0.8788095238095238
|
|
|
|
key: train_recall
|
|
value: [0.99459459 1. 0.9027027 1. 0.71891892 0.99459459
|
|
0.9516129 0.92473118 0.98387097 1. ]
|
|
|
|
mean value: 0.9471025864574252
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.86363636 0.8547619 0.9 0.76190476 0.90238095
|
|
0.88409091 0.77954545 0.92954545 0.86363636]
|
|
|
|
mean value: 0.8694047619047619
|
|
|
|
key: train_roc_auc
|
|
value: [0.95519203 0.88947368 0.94093468 0.94791667 0.85945946 0.99208896
|
|
0.96528014 0.96236559 0.96561969 0.94210526]
|
|
|
|
mean value: 0.9420436177901161
|
|
|
|
key: test_jcc
|
|
value: [0.95454545 0.875 0.77272727 0.91304348 0.52380952 0.86363636
|
|
0.86363636 0.61904762 0.9047619 0.86956522]
|
|
|
|
mean value: 0.8159773197816677
|
|
|
|
key: train_jcc
|
|
value: [0.95336788 0.89805825 0.89304813 0.94871795 0.71891892 0.98924731
|
|
0.94148936 0.92473118 0.95811518 0.94416244]
|
|
|
|
mean value: 0.9169856600174047
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14682198 0.13001108 0.12968946 0.130548 0.13054299 0.13108706
|
|
0.13012147 0.14023495 0.13049984 0.13098478]
|
|
|
|
mean value: 0.13305416107177734
|
|
|
|
key: score_time
|
|
value: [0.01479316 0.0149045 0.01485991 0.01515651 0.01488829 0.01515222
|
|
0.01562381 0.01490235 0.0150342 0.01483464]
|
|
|
|
mean value: 0.015014958381652833
|
|
|
|
key: test_mcc
|
|
value: [0.93435318 0.93435318 1. 0.92687157 0.93048421 0.93048421
|
|
0.93048421 0.85909091 0.93048421 0.72821908]
|
|
|
|
mean value: 0.9104824771523825
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.96875 1. 0.96774194 0.96774194 0.96774194
|
|
0.96774194 0.93548387 0.96774194 0.87096774]
|
|
|
|
mean value: 0.958266129032258
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.97560976 1. 0.97674419 0.97560976 0.97560976
|
|
0.97560976 0.95 0.97560976 0.90909091]
|
|
|
|
mean value: 0.9689493631722786
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.95454545 1. 1.
|
|
0.95238095 0.95 0.95238095 0.83333333]
|
|
|
|
mean value: 0.9642640692640693
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.95238095 1. 1. 0.95238095 0.95238095
|
|
1. 0.95 1. 1. ]
|
|
|
|
mean value: 0.9759523809523809
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.97619048 1. 0.95 0.97619048 0.97619048
|
|
0.95454545 0.92954545 0.95454545 0.81818182]
|
|
|
|
mean value: 0.9511580086580086
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.95238095 1. 0.95454545 0.95238095 0.95238095
|
|
0.95238095 0.9047619 0.95238095 0.83333333]
|
|
|
|
mean value: 0.9406926406926407
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.9
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04166436 0.03705072 0.04192185 0.03513145 0.04678941 0.04064345
|
|
0.04421425 0.05140901 0.0332458 0.04983377]
|
|
|
|
mean value: 0.04219040870666504
|
|
|
|
key: score_time
|
|
value: [0.01687479 0.01946425 0.02428985 0.01744056 0.02443838 0.02561164
|
|
0.02391243 0.02650714 0.01740432 0.01979828]
|
|
|
|
mean value: 0.02157416343688965
|
|
|
|
key: test_mcc
|
|
value: [1. 0.93435318 1. 0.92687157 0.93048421 0.93048421
|
|
0.93048421 0.79476958 0.79476958 0.72821908]
|
|
|
|
mean value: 0.8970435636403364
|
|
|
|
key: train_mcc
|
|
value: [0.9920858 0.98411246 0.99213963 1. 1. 0.99210029
|
|
1. 1. 0.99205967 0.99205967]
|
|
|
|
mean value: 0.9944557518725569
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.96875 1. 0.96774194 0.96774194 0.96774194
|
|
0.96774194 0.90322581 0.90322581 0.87096774]
|
|
|
|
mean value: 0.9517137096774193
|
|
|
|
key: train_accuracy
|
|
value: [0.99642857 0.99285714 0.99644128 1. 1. 0.99644128
|
|
1. 1. 0.99644128 0.99644128]
|
|
|
|
mean value: 0.9975050838840874
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97560976 1. 0.97674419 0.97560976 0.97560976
|
|
0.97560976 0.92307692 0.92307692 0.90909091]
|
|
|
|
mean value: 0.9634427965681511
|
|
|
|
key: train_fscore
|
|
value: [0.99728997 0.99462366 0.99728997 1. 1. 0.99730458
|
|
1. 1. 0.99731903 0.99731903]
|
|
|
|
mean value: 0.9981146253628773
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.95454545 1. 1.
|
|
0.95238095 0.94736842 0.94736842 0.83333333]
|
|
|
|
mean value: 0.9634996582365003
|
|
|
|
key: train_precision
|
|
value: [1. 0.98930481 1. 1. 1. 0.99462366
|
|
1. 1. 0.99465241 0.99465241]
|
|
|
|
mean value: 0.9973233281582428
|
|
|
|
key: test_recall
|
|
value: [1. 0.95238095 1. 1. 0.95238095 0.95238095
|
|
1. 0.9 0.9 1. ]
|
|
|
|
mean value: 0.9657142857142857
|
|
|
|
key: train_recall
|
|
value: [0.99459459 1. 0.99459459 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9989189189189189
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97619048 1. 0.95 0.97619048 0.97619048
|
|
0.95454545 0.90454545 0.90454545 0.81818182]
|
|
|
|
mean value: 0.946038961038961
|
|
|
|
key: train_roc_auc
|
|
value: [0.9972973 0.98947368 0.9972973 1. 1. 0.99479167
|
|
1. 1. 0.99473684 0.99473684]
|
|
|
|
mean value: 0.9968333629682314
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95238095 1. 0.95454545 0.95238095 0.95238095
|
|
0.95238095 0.85714286 0.85714286 0.83333333]
|
|
|
|
mean value: 0.9311688311688311
|
|
|
|
key: train_jcc
|
|
value: [0.99459459 0.98930481 0.99459459 1. 1. 0.99462366
|
|
1. 1. 0.99465241 0.99465241]
|
|
|
|
mean value: 0.9962422470771617
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03371096 0.06597567 0.09045339 0.05891442 0.08815241 0.07290912
|
|
0.1127162 0.09216666 0.08394241 0.04576588]
|
|
|
|
mean value: 0.07447071075439453
|
|
|
|
key: score_time
|
|
value: [0.01326203 0.01323938 0.01386213 0.01348066 0.02233791 0.02737832
|
|
0.02631879 0.02382731 0.01327252 0.01324177]
|
|
|
|
mean value: 0.01802208423614502
|
|
|
|
key: test_mcc
|
|
value: [0.41281273 0.49517597 0.40952381 0.38154231 0.61758068 0.44786837
|
|
0.51793973 0.14863011 0.64116449 0.40572206]
|
|
|
|
mean value: 0.44779602562273146
|
|
|
|
key: train_mcc
|
|
value: [0.96830875 0.99204533 0.97623798 0.97636634 0.98422269 0.98422269
|
|
0.97624243 0.97624243 0.97624243 0.97624243]
|
|
|
|
mean value: 0.9786373500460502
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.78125 0.74193548 0.74193548 0.83870968 0.77419355
|
|
0.77419355 0.64516129 0.83870968 0.74193548]
|
|
|
|
mean value: 0.7628024193548387
|
|
|
|
key: train_accuracy
|
|
value: [0.98571429 0.99642857 0.98932384 0.98932384 0.99288256 0.99288256
|
|
0.98932384 0.98932384 0.98932384 0.98932384]
|
|
|
|
mean value: 0.9903851042196238
|
|
|
|
key: test_fscore
|
|
value: [0.82608696 0.85106383 0.80952381 0.81818182 0.88888889 0.85106383
|
|
0.82051282 0.75555556 0.88372093 0.81818182]
|
|
|
|
mean value: 0.8322780257173477
|
|
|
|
key: train_fscore
|
|
value: [0.98930481 0.99730458 0.99191375 0.9919571 0.99462366 0.99462366
|
|
0.992 0.992 0.992 0.992 ]
|
|
|
|
mean value: 0.9927727558060793
|
|
|
|
key: test_precision
|
|
value: [0.76 0.76923077 0.80952381 0.7826087 0.83333333 0.76923077
|
|
0.84210526 0.68 0.82608696 0.75 ]
|
|
|
|
mean value: 0.782211959665049
|
|
|
|
key: train_precision
|
|
value: [0.97883598 0.99462366 0.98924731 0.98404255 0.98930481 0.98930481
|
|
0.98412698 0.98412698 0.98412698 0.98412698]
|
|
|
|
mean value: 0.9861867061945789
|
|
|
|
key: test_recall
|
|
value: [0.9047619 0.95238095 0.80952381 0.85714286 0.95238095 0.95238095
|
|
0.8 0.85 0.95 0.9 ]
|
|
|
|
mean value: 0.8928571428571428
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.99459459 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994594594594595
|
|
|
|
key: test_roc_auc
|
|
value: [0.67965368 0.7034632 0.7047619 0.67857143 0.77619048 0.67619048
|
|
0.76363636 0.56136364 0.79318182 0.67727273]
|
|
|
|
mean value: 0.7014285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.97894737 0.99473684 0.98688063 0.984375 0.98958333 0.98958333
|
|
0.98421053 0.98421053 0.98421053 0.98421053]
|
|
|
|
mean value: 0.9860948613086771
|
|
|
|
key: test_jcc
|
|
value: [0.7037037 0.74074074 0.68 0.69230769 0.8 0.74074074
|
|
0.69565217 0.60714286 0.79166667 0.69230769]
|
|
|
|
mean value: 0.7144262267523137
|
|
|
|
key: train_jcc
|
|
value: [0.97883598 0.99462366 0.98395722 0.98404255 0.98930481 0.98930481
|
|
0.98412698 0.98412698 0.98412698 0.98412698]
|
|
|
|
mean value: 0.9856576969369168
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.53377724 0.52432537 0.47125101 0.45838308 0.44565415 0.44443941
|
|
0.44144034 0.46182108 0.45414662 0.43897247]
|
|
|
|
mean value: 0.4674210786819458
|
|
|
|
key: score_time
|
|
value: [0.01514387 0.01880264 0.00988698 0.00940108 0.00947022 0.0095377
|
|
0.00945544 0.0099566 0.00943661 0.00944352]
|
|
|
|
mean value: 0.011053466796875
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 0.85238095 1. 0.93048421
|
|
0.93048421 0.93048421 0.85909091 0.72821908]
|
|
|
|
mean value: 0.9231143573921693
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 0.93548387 1. 0.96774194
|
|
0.96774194 0.96774194 0.93548387 0.87096774]
|
|
|
|
mean value: 0.964516129032258
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 0.95238095 1. 0.97560976
|
|
0.97560976 0.97560976 0.95 0.90909091]
|
|
|
|
mean value: 0.9738301129764544
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.95238095 1. 1.
|
|
0.95238095 0.95238095 0.95 0.83333333]
|
|
|
|
mean value: 0.964047619047619
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.95238095 1. 0.95238095
|
|
1. 1. 0.95 1. ]
|
|
|
|
mean value: 0.9854761904761905
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 0.92619048 1. 0.97619048
|
|
0.95454545 0.95454545 0.92954545 0.81818182]
|
|
|
|
mean value: 0.9559199134199134
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 0.90909091 1. 0.95238095
|
|
0.95238095 0.95238095 0.9047619 0.83333333]
|
|
|
|
mean value: 0.9504329004329004
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.87
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02589202 0.02847075 0.0404582 0.02659345 0.02673697 0.03226662
|
|
0.03573513 0.02714109 0.05389953 0.03467345]
|
|
|
|
mean value: 0.033186721801757815
|
|
|
|
key: score_time
|
|
value: [0.01283598 0.01288366 0.01362491 0.01272559 0.01264453 0.01271176
|
|
0.01251125 0.01660681 0.01272869 0.01582789]
|
|
|
|
mean value: 0.01351010799407959
|
|
|
|
key: test_mcc
|
|
value: [ 0.21867346 0.0849412 -0.26560636 0.09967105 -0.05976143 0.00752923
|
|
0.01363636 -0.23927198 0.14863011 0.22469871]
|
|
|
|
mean value: 0.0233140353744436
|
|
|
|
key: train_mcc
|
|
value: [0.36577134 0.33200663 0.37383194 0.35226764 0.35226764 0.34110438
|
|
0.34382047 0.39766525 0.32040778 0.35507261]
|
|
|
|
mean value: 0.3534215663451066
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.65625 0.5483871 0.67741935 0.61290323 0.64516129
|
|
0.5483871 0.48387097 0.64516129 0.67741935]
|
|
|
|
mean value: 0.6182459677419354
|
|
|
|
key: train_accuracy
|
|
value: [0.725 0.71428571 0.72597865 0.71886121 0.71886121 0.71530249
|
|
0.71886121 0.7366548 0.71174377 0.72241993]
|
|
|
|
mean value: 0.7207968988307066
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.78431373 0.70833333 0.8 0.75 0.7755102
|
|
0.65 0.63636364 0.75555556 0.7826087 ]
|
|
|
|
mean value: 0.7442685150476528
|
|
|
|
key: train_fscore
|
|
value: [0.82774049 0.82222222 0.82774049 0.82405345 0.82405345 0.82222222
|
|
0.8248337 0.83408072 0.82119205 0.82666667]
|
|
|
|
mean value: 0.8254805473034187
|
|
|
|
key: test_precision
|
|
value: [0.68965517 0.66666667 0.62962963 0.68965517 0.66666667 0.67857143
|
|
0.65 0.58333333 0.68 0.69230769]
|
|
|
|
mean value: 0.6626485762003004
|
|
|
|
key: train_precision
|
|
value: [0.70610687 0.69811321 0.70610687 0.70075758 0.70075758 0.69811321
|
|
0.70188679 0.71538462 0.69662921 0.70454545]
|
|
|
|
mean value: 0.7028401382933552
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.95238095 0.80952381 0.95238095 0.85714286 0.9047619
|
|
0.65 0.7 0.85 0.9 ]
|
|
|
|
mean value: 0.8528571428571429
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.56709957 0.52164502 0.4047619 0.52619048 0.47857143 0.50238095
|
|
0.50681818 0.39545455 0.56136364 0.58636364]
|
|
|
|
mean value: 0.505064935064935
|
|
|
|
key: train_roc_auc
|
|
value: [0.59473684 0.57894737 0.59895833 0.58854167 0.58854167 0.58333333
|
|
0.58421053 0.61052632 0.57368421 0.58947368]
|
|
|
|
mean value: 0.5890953947368421
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.64516129 0.5483871 0.66666667 0.6 0.63333333
|
|
0.48148148 0.46666667 0.60714286 0.64285714]
|
|
|
|
mean value: 0.5958363201911588
|
|
|
|
key: train_jcc
|
|
value: [0.70610687 0.69811321 0.70610687 0.70075758 0.70075758 0.69811321
|
|
0.70188679 0.71538462 0.69662921 0.70454545]
|
|
|
|
mean value: 0.7028401382933552
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02688026 0.03328443 0.03506446 0.03774476 0.03506637 0.0350914
|
|
0.03563261 0.03496313 0.03501749 0.02813625]
|
|
|
|
mean value: 0.0336881160736084
|
|
|
|
key: score_time
|
|
value: [0.02236223 0.0209496 0.02134967 0.02306938 0.02363062 0.02382088
|
|
0.02356005 0.0199604 0.02020645 0.0209074 ]
|
|
|
|
mean value: 0.02198166847229004
|
|
|
|
key: test_mcc
|
|
value: [1. 0.86147186 0.85238095 0.85465477 1. 0.93048421
|
|
0.78625916 0.49780905 0.85909091 0.64116449]
|
|
|
|
mean value: 0.828331540294447
|
|
|
|
key: train_mcc
|
|
value: [0.95220382 0.94428471 0.95253998 0.95241514 0.95253998 0.94467837
|
|
0.96021134 0.96021134 0.96021134 0.96021134]
|
|
|
|
mean value: 0.9539507359002783
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9375 0.93548387 0.93548387 1. 0.96774194
|
|
0.90322581 0.77419355 0.93548387 0.83870968]
|
|
|
|
mean value: 0.9227822580645161
|
|
|
|
key: train_accuracy
|
|
value: [0.97857143 0.975 0.97864769 0.97864769 0.97864769 0.97508897
|
|
0.98220641 0.98220641 0.98220641 0.98220641]
|
|
|
|
mean value: 0.979342907981698
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95238095 0.95238095 0.95454545 1. 0.97560976
|
|
0.92682927 0.82926829 0.95 0.88372093]
|
|
|
|
mean value: 0.9424735606613088
|
|
|
|
key: train_fscore
|
|
value: [0.98395722 0.98133333 0.98395722 0.98387097 0.98395722 0.98133333
|
|
0.98666667 0.98666667 0.98666667 0.98666667]
|
|
|
|
mean value: 0.9845075958829279
|
|
|
|
key: test_precision
|
|
value: [1. 0.95238095 0.95238095 0.91304348 1. 1.
|
|
0.9047619 0.80952381 0.95 0.82608696]
|
|
|
|
mean value: 0.9308178053830227
|
|
|
|
key: train_precision
|
|
value: [0.97354497 0.96842105 0.97354497 0.97860963 0.97354497 0.96842105
|
|
0.97883598 0.97883598 0.97883598 0.97883598]
|
|
|
|
mean value: 0.9751430566910443
|
|
|
|
key: test_recall
|
|
value: [1. 0.95238095 0.95238095 1. 1. 0.95238095
|
|
0.95 0.85 0.95 0.95 ]
|
|
|
|
mean value: 0.9557142857142857
|
|
|
|
key: train_recall
|
|
value: [0.99459459 0.99459459 0.99459459 0.98918919 0.99459459 0.99459459
|
|
0.99462366 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9940656785818076
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93073593 0.92619048 0.9 1. 0.97619048
|
|
0.88409091 0.74318182 0.92954545 0.79318182]
|
|
|
|
mean value: 0.9083116883116883
|
|
|
|
key: train_roc_auc
|
|
value: [0.97098151 0.96571835 0.97125563 0.97376126 0.97125563 0.9660473
|
|
0.9762592 0.9762592 0.9762592 0.9762592 ]
|
|
|
|
mean value: 0.9724056463084476
|
|
|
|
key: test_jcc
|
|
value: [1. 0.90909091 0.90909091 0.91304348 1. 0.95238095
|
|
0.86363636 0.70833333 0.9047619 0.79166667]
|
|
|
|
mean value: 0.8952004517221909
|
|
|
|
key: train_jcc
|
|
value: [0.96842105 0.96335079 0.96842105 0.96825397 0.96842105 0.96335079
|
|
0.97368421 0.97368421 0.97368421 0.97368421]
|
|
|
|
mean value: 0.9694955538934596
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.27341199 0.26561522 0.33326602 0.29521275 0.33132577 0.29219532
|
|
0.31099606 0.33863211 0.34531784 0.34489107]
|
|
|
|
mean value: 0.3130864143371582
|
|
|
|
key: score_time
|
|
value: [0.01944685 0.02101326 0.02376032 0.02346349 0.02274227 0.02088618
|
|
0.02771783 0.02203941 0.02380085 0.02210379]
|
|
|
|
mean value: 0.02269742488861084
|
|
|
|
key: test_mcc
|
|
value: [1. 0.86147186 0.85238095 0.85465477 0.78625916 0.78625916
|
|
0.85909091 0.54627358 0.93048421 0.66057826]
|
|
|
|
mean value: 0.8137452858509517
|
|
|
|
key: train_mcc
|
|
value: [0.95220382 0.94428471 0.97623798 0.95241514 0.98417793 0.96831892
|
|
0.97611544 0.97611544 0.98409734 0.98409734]
|
|
|
|
mean value: 0.9698064067154124
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9375 0.93548387 0.93548387 0.90322581 0.90322581
|
|
0.93548387 0.77419355 0.96774194 0.83870968]
|
|
|
|
mean value: 0.9131048387096774
|
|
|
|
key: train_accuracy
|
|
value: [0.97857143 0.975 0.98932384 0.97864769 0.99288256 0.98576512
|
|
0.98932384 0.98932384 0.99288256 0.99288256]
|
|
|
|
mean value: 0.986460345704118
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95238095 0.95238095 0.95454545 0.92682927 0.92682927
|
|
0.95 0.81081081 0.97560976 0.88888889]
|
|
|
|
mean value: 0.9338275351689985
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.98395722 0.98133333 0.99191375 0.98387097 0.99459459 0.98924731
|
|
0.9919571 0.9919571 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9898078694323124
|
|
|
|
key: test_precision
|
|
value: [1. 0.95238095 0.95238095 0.91304348 0.95 0.95
|
|
0.95 0.88235294 0.95238095 0.8 ]
|
|
|
|
mean value: 0.9302539276580197
|
|
|
|
key: train_precision
|
|
value: [0.97354497 0.96842105 0.98924731 0.97860963 0.99459459 0.98395722
|
|
0.98930481 0.98930481 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9856231715015297
|
|
|
|
key: test_recall
|
|
value: [1. 0.95238095 0.95238095 1. 0.9047619 0.9047619
|
|
0.95 0.75 1. 1. ]
|
|
|
|
mean value: 0.9414285714285714
|
|
|
|
key: train_recall
|
|
value: [0.99459459 0.99459459 0.99459459 0.98918919 0.99459459 0.99459459
|
|
0.99462366 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9940656785818076
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93073593 0.92619048 0.9 0.90238095 0.90238095
|
|
0.92954545 0.78409091 0.95454545 0.77272727]
|
|
|
|
mean value: 0.9002597402597402
|
|
|
|
key: train_roc_auc
|
|
value: [0.97098151 0.96571835 0.98688063 0.97376126 0.99208896 0.9816723
|
|
0.98678551 0.98678551 0.99204867 0.99204867]
|
|
|
|
mean value: 0.9828771375365178
|
|
|
|
key: test_jcc
|
|
value: [1. 0.90909091 0.90909091 0.91304348 0.86363636 0.86363636
|
|
0.9047619 0.68181818 0.95238095 0.8 ]
|
|
|
|
mean value: 0.8797459062676454
|
|
|
|
key: train_jcc
|
|
value: [0.96842105 0.96335079 0.98395722 0.96825397 0.98924731 0.9787234
|
|
0.98404255 0.98404255 0.98930481 0.98930481]
|
|
|
|
mean value: 0.9798648473611902
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03805447 0.03569889 0.03429389 0.04928493 0.0660429 0.04097486
|
|
0.10463309 0.07833409 0.0353334 0.04143572]
|
|
|
|
mean value: 0.05240862369537354
|
|
|
|
key: score_time
|
|
value: [0.01335335 0.01258039 0.01363444 0.01591015 0.01217866 0.01498747
|
|
0.01492548 0.01208353 0.01185942 0.01482725]
|
|
|
|
mean value: 0.013634014129638671
|
|
|
|
key: test_mcc
|
|
value: [0.76277007 0.90889326 0.80817439 0.80817439 0.8047619 0.70714286
|
|
0.90238095 0.65952381 0.8047619 0.8047619 ]
|
|
|
|
mean value: 0.7971345447661506
|
|
|
|
key: train_mcc
|
|
value: [0.88154484 0.88712176 0.88680616 0.87612986 0.871086 0.89769524
|
|
0.89238376 0.91925359 0.88220797 0.8869027 ]
|
|
|
|
mean value: 0.8881131867238982
|
|
|
|
key: test_accuracy
|
|
value: [0.88095238 0.95238095 0.90243902 0.90243902 0.90243902 0.85365854
|
|
0.95121951 0.82926829 0.90243902 0.90243902]
|
|
|
|
mean value: 0.8979674796747967
|
|
|
|
key: train_accuracy
|
|
value: [0.94054054 0.94324324 0.94339623 0.93800539 0.93530997 0.94878706
|
|
0.94609164 0.95956873 0.94070081 0.94339623]
|
|
|
|
mean value: 0.943903984847381
|
|
|
|
key: test_fscore
|
|
value: [0.88372093 0.95454545 0.90909091 0.90909091 0.9047619 0.85714286
|
|
0.95 0.82926829 0.9 0.9 ]
|
|
|
|
mean value: 0.8997621257547519
|
|
|
|
key: train_fscore
|
|
value: [0.94148936 0.94429708 0.94339623 0.9383378 0.93617021 0.94906166
|
|
0.94680851 0.96 0.94210526 0.944 ]
|
|
|
|
mean value: 0.944566612071446
|
|
|
|
key: test_precision
|
|
value: [0.86363636 0.91304348 0.86956522 0.86956522 0.9047619 0.85714286
|
|
0.95 0.80952381 0.9 0.9 ]
|
|
|
|
mean value: 0.8837238848108413
|
|
|
|
key: train_precision
|
|
value: [0.92670157 0.92708333 0.94086022 0.93085106 0.92146597 0.94148936
|
|
0.93684211 0.95238095 0.92268041 0.93650794]
|
|
|
|
mean value: 0.9336862919709208
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 0.95238095 0.95238095 0.9047619 0.85714286
|
|
0.95 0.85 0.9 0.9 ]
|
|
|
|
mean value: 0.9171428571428571
|
|
|
|
key: train_recall
|
|
value: [0.95675676 0.96216216 0.94594595 0.94594595 0.95135135 0.95675676
|
|
0.95698925 0.96774194 0.96236559 0.9516129 ]
|
|
|
|
mean value: 0.9557628596338275
|
|
|
|
key: test_roc_auc
|
|
value: [0.88095238 0.95238095 0.90119048 0.90119048 0.90238095 0.85357143
|
|
0.95119048 0.8297619 0.90238095 0.90238095]
|
|
|
|
mean value: 0.8977380952380951
|
|
|
|
key: train_roc_auc
|
|
value: [0.94054054 0.94324324 0.94340308 0.93802674 0.9353531 0.94880849
|
|
0.94606219 0.95954664 0.94064226 0.94337402]
|
|
|
|
mean value: 0.9439000290613194
|
|
|
|
key: test_jcc
|
|
value: [0.79166667 0.91304348 0.83333333 0.83333333 0.82608696 0.75
|
|
0.9047619 0.70833333 0.81818182 0.81818182]
|
|
|
|
mean value: 0.8196922642574817
|
|
|
|
key: train_jcc
|
|
value: [0.88944724 0.89447236 0.89285714 0.88383838 0.88 0.90306122
|
|
0.8989899 0.92307692 0.89054726 0.89393939]
|
|
|
|
mean value: 0.8950229828863081
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.39968538 1.02785754 1.49144053 1.42981839 0.8560338 1.02810216
|
|
0.97955227 0.91374874 1.23398352 1.15036035]
|
|
|
|
mean value: 1.151058268547058
|
|
|
|
key: score_time
|
|
value: [0.0152576 0.01563644 0.01735854 0.01563406 0.01553392 0.01554656
|
|
0.01516581 0.01547575 0.01566935 0.01563621]
|
|
|
|
mean value: 0.015691423416137697
|
|
|
|
key: test_mcc
|
|
value: [0.85811633 0.90889326 0.95238095 0.90238095 1. 0.8047619
|
|
0.95227002 0.70714286 0.95227002 0.85441771]
|
|
|
|
mean value: 0.8892633994360876
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92857143 0.95238095 0.97560976 0.95121951 1. 0.90243902
|
|
0.97560976 0.85365854 0.97560976 0.92682927]
|
|
|
|
mean value: 0.9441927990708479
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93023256 0.95454545 0.97560976 0.95238095 1. 0.9047619
|
|
0.97435897 0.85 0.97435897 0.92307692]
|
|
|
|
mean value: 0.9439325497720279
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.91304348 1. 0.95238095 1. 0.9047619
|
|
1. 0.85 1. 0.94736842]
|
|
|
|
mean value: 0.9476645665547268
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 0.95238095 0.95238095 1. 0.9047619
|
|
0.95 0.85 0.95 0.9 ]
|
|
|
|
mean value: 0.9411904761904761
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 0.95238095 0.97619048 0.95119048 1. 0.90238095
|
|
0.975 0.85357143 0.975 0.92619048]
|
|
|
|
mean value: 0.944047619047619
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.86956522 0.91304348 0.95238095 0.90909091 1. 0.82608696
|
|
0.95 0.73913043 0.95 0.85714286]
|
|
|
|
mean value: 0.896644080557124
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.77
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01401162 0.01052999 0.01121998 0.01657534 0.00958681 0.00979447
|
|
0.01485133 0.01169181 0.00982499 0.00981498]
|
|
|
|
mean value: 0.011790132522583008
|
|
|
|
key: score_time
|
|
value: [0.01367188 0.00954771 0.01246524 0.01591039 0.00917268 0.00907016
|
|
0.01428008 0.00912118 0.00907564 0.00906348]
|
|
|
|
mean value: 0.011137843132019043
|
|
|
|
key: test_mcc
|
|
value: [0.57207755 0.53357838 0.66432098 0.46623254 0.86240942 0.61152662
|
|
0.8047619 0.53864117 0.72229808 0.75714286]
|
|
|
|
mean value: 0.6532989494622181
|
|
|
|
key: train_mcc
|
|
value: [0.64703542 0.67504003 0.716066 0.6942959 0.71499964 0.76337838
|
|
0.67722017 0.72381482 0.7197458 0.71480337]
|
|
|
|
mean value: 0.7046399531141702
|
|
|
|
key: test_accuracy
|
|
value: [0.78571429 0.76190476 0.82926829 0.73170732 0.92682927 0.80487805
|
|
0.90243902 0.75609756 0.85365854 0.87804878]
|
|
|
|
mean value: 0.8230545876887341
|
|
|
|
key: train_accuracy
|
|
value: [0.82162162 0.83243243 0.85444744 0.84366577 0.85444744 0.8787062
|
|
0.83557951 0.85983827 0.85714286 0.85444744]
|
|
|
|
mean value: 0.8492328986668609
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.7826087 0.84444444 0.75555556 0.93333333 0.81818182
|
|
0.9 0.7826087 0.86363636 0.87804878]
|
|
|
|
mean value: 0.8338905491821716
|
|
|
|
key: train_fscore
|
|
value: [0.83076923 0.84577114 0.86363636 0.85353535 0.86294416 0.88549618
|
|
0.84634761 0.86734694 0.86582278 0.86363636]
|
|
|
|
mean value: 0.8585306132137107
|
|
|
|
key: test_precision
|
|
value: [0.8 0.72 0.79166667 0.70833333 0.875 0.7826087
|
|
0.9 0.69230769 0.79166667 0.85714286]
|
|
|
|
mean value: 0.791872591176939
|
|
|
|
key: train_precision
|
|
value: [0.7902439 0.78341014 0.81042654 0.80094787 0.81339713 0.83653846
|
|
0.79620853 0.82524272 0.81818182 0.81428571]
|
|
|
|
mean value: 0.8088882820715697
|
|
|
|
key: test_recall
|
|
value: [0.76190476 0.85714286 0.9047619 0.80952381 1. 0.85714286
|
|
0.9 0.9 0.95 0.9 ]
|
|
|
|
mean value: 0.8840476190476191
|
|
|
|
key: train_recall
|
|
value: [0.87567568 0.91891892 0.92432432 0.91351351 0.91891892 0.94054054
|
|
0.90322581 0.91397849 0.91935484 0.91935484]
|
|
|
|
mean value: 0.9147805870386516
|
|
|
|
key: test_roc_auc
|
|
value: [0.78571429 0.76190476 0.82738095 0.7297619 0.925 0.80357143
|
|
0.90238095 0.75952381 0.85595238 0.87857143]
|
|
|
|
mean value: 0.8229761904761904
|
|
|
|
key: train_roc_auc
|
|
value: [0.82162162 0.83243243 0.85463528 0.84385353 0.85462075 0.87887242
|
|
0.83539669 0.85969195 0.85697472 0.85427201]
|
|
|
|
mean value: 0.8492371403661726
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.64285714 0.73076923 0.60714286 0.875 0.69230769
|
|
0.81818182 0.64285714 0.76 0.7826087 ]
|
|
|
|
mean value: 0.7191724579768058
|
|
|
|
key: train_jcc
|
|
value: [0.71052632 0.73275862 0.76 0.74449339 0.75892857 0.79452055
|
|
0.73362445 0.76576577 0.76339286 0.76 ]
|
|
|
|
mean value: 0.7524010524980485
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01205778 0.01103449 0.01065707 0.01740766 0.01130915 0.01000571
|
|
0.01102018 0.01014924 0.01479101 0.01021481]
|
|
|
|
mean value: 0.011864709854125976
|
|
|
|
key: score_time
|
|
value: [0.01010084 0.00973463 0.00967646 0.01493192 0.00972748 0.00886583
|
|
0.00895524 0.01499701 0.00933695 0.00921488]
|
|
|
|
mean value: 0.010554122924804687
|
|
|
|
key: test_mcc
|
|
value: [0.57735027 0.57735027 0.75714286 0.67700771 0.7633652 0.65871309
|
|
0.7633652 0.71121921 0.70714286 0.90238095]
|
|
|
|
mean value: 0.7095037610692975
|
|
|
|
key: train_mcc
|
|
value: [0.75417724 0.74123391 0.71558817 0.74235478 0.74235478 0.74235478
|
|
0.72167661 0.72031226 0.73194294 0.73194294]
|
|
|
|
mean value: 0.7343938376347271
|
|
|
|
key: test_accuracy
|
|
value: [0.78571429 0.78571429 0.87804878 0.82926829 0.87804878 0.82926829
|
|
0.87804878 0.85365854 0.85365854 0.95121951]
|
|
|
|
mean value: 0.8522648083623693
|
|
|
|
key: train_accuracy
|
|
value: [0.87567568 0.87027027 0.85714286 0.87061995 0.87061995 0.87061995
|
|
0.85983827 0.85983827 0.86522911 0.86522911]
|
|
|
|
mean value: 0.8665083412253224
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.8 0.87804878 0.85106383 0.88888889 0.8372093
|
|
0.86486486 0.85714286 0.85 0.95 ]
|
|
|
|
mean value: 0.8577218523497231
|
|
|
|
key: train_fscore
|
|
value: [0.88082902 0.87301587 0.86089239 0.87368421 0.87368421 0.87368421
|
|
0.86528497 0.86315789 0.86979167 0.86979167]
|
|
|
|
mean value: 0.8703816110753745
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 0.9 0.76923077 0.83333333 0.81818182
|
|
0.94117647 0.81818182 0.85 0.95 ]
|
|
|
|
mean value: 0.8380104209515974
|
|
|
|
key: train_precision
|
|
value: [0.84577114 0.85492228 0.83673469 0.85128205 0.85128205 0.85128205
|
|
0.835 0.84536082 0.84343434 0.84343434]
|
|
|
|
mean value: 0.8458503783406013
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.85714286 0.85714286 0.95238095 0.95238095 0.85714286
|
|
0.8 0.9 0.85 0.95 ]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.91891892 0.89189189 0.88648649 0.8972973 0.8972973 0.8972973
|
|
0.89784946 0.88172043 0.89784946 0.89784946]
|
|
|
|
mean value: 0.8964458006393491
|
|
|
|
key: test_roc_auc
|
|
value: [0.78571429 0.78571429 0.87857143 0.82619048 0.87619048 0.82857143
|
|
0.87619048 0.8547619 0.85357143 0.95119048]
|
|
|
|
mean value: 0.8516666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.87567568 0.87027027 0.85722174 0.87069166 0.87069166 0.87069166
|
|
0.85973554 0.85977913 0.86514095 0.86514095]
|
|
|
|
mean value: 0.8665039232781169
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.66666667 0.7826087 0.74074074 0.8 0.72
|
|
0.76190476 0.75 0.73913043 0.9047619 ]
|
|
|
|
mean value: 0.7532479871175524
|
|
|
|
key: train_jcc
|
|
value: [0.78703704 0.77464789 0.75576037 0.77570093 0.77570093 0.77570093
|
|
0.76255708 0.75925926 0.76958525 0.76958525]
|
|
|
|
mean value: 0.7705534940560166
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01108479 0.0093708 0.01117444 0.01171565 0.01109958 0.0105772
|
|
0.01115322 0.01098776 0.00986218 0.01298475]
|
|
|
|
mean value: 0.011001038551330566
|
|
|
|
key: score_time
|
|
value: [0.03188109 0.01672149 0.02224278 0.01918387 0.01870203 0.01756144
|
|
0.01790571 0.01775837 0.01715755 0.02098441]
|
|
|
|
mean value: 0.020009875297546387
|
|
|
|
key: test_mcc
|
|
value: [0.43052839 0.62187434 0.56086079 0.51190476 0.71121921 0.65871309
|
|
0.61152662 0.56086079 0.60952381 0.51320273]
|
|
|
|
mean value: 0.5790214513441565
|
|
|
|
key: train_mcc
|
|
value: [0.76327807 0.80010521 0.73634484 0.75792591 0.75307912 0.74160356
|
|
0.75239189 0.74565731 0.74718674 0.75274878]
|
|
|
|
mean value: 0.7550321433011067
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.80952381 0.7804878 0.75609756 0.85365854 0.82926829
|
|
0.80487805 0.7804878 0.80487805 0.75609756]
|
|
|
|
mean value: 0.7889663182346109
|
|
|
|
key: train_accuracy
|
|
value: [0.88108108 0.9 0.86792453 0.8787062 0.87601078 0.87061995
|
|
0.87601078 0.87061995 0.87331536 0.87601078]
|
|
|
|
mean value: 0.8770299409922051
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.8 0.79069767 0.76190476 0.85 0.8372093
|
|
0.78947368 0.76923077 0.8 0.73684211]
|
|
|
|
mean value: 0.7835358297353401
|
|
|
|
key: train_fscore
|
|
value: [0.87777778 0.89918256 0.86501377 0.87603306 0.87222222 0.86813187
|
|
0.87830688 0.86363636 0.87123288 0.87362637]
|
|
|
|
mean value: 0.8745163753677637
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.84210526 0.77272727 0.76190476 0.89473684 0.81818182
|
|
0.83333333 0.78947368 0.8 0.77777778]
|
|
|
|
mean value: 0.8027082858661806
|
|
|
|
key: train_precision
|
|
value: [0.90285714 0.90659341 0.88202247 0.89325843 0.89714286 0.88268156
|
|
0.86458333 0.91566265 0.88826816 0.89325843]
|
|
|
|
mean value: 0.8926328437042237
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.76190476 0.80952381 0.76190476 0.80952381 0.85714286
|
|
0.75 0.75 0.8 0.7 ]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_recall
|
|
value: [0.85405405 0.89189189 0.84864865 0.85945946 0.84864865 0.85405405
|
|
0.89247312 0.8172043 0.85483871 0.85483871]
|
|
|
|
mean value: 0.8576111595466435
|
|
|
|
key: test_roc_auc
|
|
value: [0.71428571 0.80952381 0.7797619 0.75595238 0.8547619 0.82857143
|
|
0.80357143 0.7797619 0.8047619 0.7547619 ]
|
|
|
|
mean value: 0.7885714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.88108108 0.9 0.86787271 0.87865446 0.87593723 0.87057541
|
|
0.87596629 0.87076431 0.8733653 0.876068 ]
|
|
|
|
mean value: 0.8770284800929963
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.66666667 0.65384615 0.61538462 0.73913043 0.72
|
|
0.65217391 0.625 0.66666667 0.58333333]
|
|
|
|
mean value: 0.6460663322185061
|
|
|
|
key: train_jcc
|
|
value: [0.78217822 0.81683168 0.76213592 0.77941176 0.77339901 0.76699029
|
|
0.78301887 0.76 0.77184466 0.77560976]
|
|
|
|
mean value: 0.7771420178282804
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01698852 0.01849985 0.01615834 0.01625133 0.01616049 0.0162015
|
|
0.01634336 0.01601434 0.01653647 0.01660895]
|
|
|
|
mean value: 0.016576313972473146
|
|
|
|
key: score_time
|
|
value: [0.01195359 0.01049948 0.01058865 0.01097417 0.01060414 0.01036382
|
|
0.01065779 0.01078796 0.01075959 0.0114882 ]
|
|
|
|
mean value: 0.010867738723754882
|
|
|
|
key: test_mcc
|
|
value: [0.58834841 0.78446454 0.81975606 0.65915306 0.57570364 0.65871309
|
|
0.90692382 0.76500781 0.8047619 0.8547619 ]
|
|
|
|
mean value: 0.7417594238667976
|
|
|
|
key: train_mcc
|
|
value: [0.76980251 0.7855844 0.79896877 0.76934606 0.7608309 0.79108463
|
|
0.76918835 0.81470293 0.78607065 0.7751856 ]
|
|
|
|
mean value: 0.7820764804941465
|
|
|
|
key: test_accuracy
|
|
value: [0.78571429 0.88095238 0.90243902 0.80487805 0.7804878 0.82926829
|
|
0.95121951 0.87804878 0.90243902 0.92682927]
|
|
|
|
mean value: 0.8642276422764228
|
|
|
|
key: train_accuracy
|
|
value: [0.88108108 0.88918919 0.89757412 0.88140162 0.87601078 0.89218329
|
|
0.88140162 0.90566038 0.88948787 0.88409704]
|
|
|
|
mean value: 0.8878086981860567
|
|
|
|
key: test_fscore
|
|
value: [0.80851064 0.89361702 0.91304348 0.84 0.80851064 0.8372093
|
|
0.95238095 0.88372093 0.9 0.92682927]
|
|
|
|
mean value: 0.8763822229364985
|
|
|
|
key: train_fscore
|
|
value: [0.88888889 0.89620253 0.90206186 0.88832487 0.88442211 0.89847716
|
|
0.88888889 0.91002571 0.89672544 0.89168766]
|
|
|
|
mean value: 0.8945705111280717
|
|
|
|
key: test_precision
|
|
value: [0.73076923 0.80769231 0.84 0.72413793 0.73076923 0.81818182
|
|
0.90909091 0.82608696 0.9 0.9047619 ]
|
|
|
|
mean value: 0.8191490288821623
|
|
|
|
key: train_precision
|
|
value: [0.83412322 0.84285714 0.86206897 0.83732057 0.82629108 0.84688995
|
|
0.83809524 0.87192118 0.8436019 0.83886256]
|
|
|
|
mean value: 0.8442031812588747
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 1. 1. 0.9047619 0.85714286
|
|
1. 0.95 0.9 0.95 ]
|
|
|
|
mean value: 0.9466666666666667
|
|
|
|
key: train_recall
|
|
value: [0.95135135 0.95675676 0.94594595 0.94594595 0.95135135 0.95675676
|
|
0.94623656 0.9516129 0.95698925 0.9516129 ]
|
|
|
|
mean value: 0.9514559721011334
|
|
|
|
key: test_roc_auc
|
|
value: [0.78571429 0.88095238 0.9 0.8 0.77738095 0.82857143
|
|
0.95238095 0.8797619 0.90238095 0.92738095]
|
|
|
|
mean value: 0.863452380952381
|
|
|
|
key: train_roc_auc
|
|
value: [0.88108108 0.88918919 0.89770416 0.88157512 0.87621331 0.89235687
|
|
0.88122639 0.90553618 0.88930543 0.88391456]
|
|
|
|
mean value: 0.8878102295844231
|
|
|
|
key: test_jcc
|
|
value: [0.67857143 0.80769231 0.84 0.72413793 0.67857143 0.72
|
|
0.90909091 0.79166667 0.81818182 0.86363636]
|
|
|
|
mean value: 0.7831548853445405
|
|
|
|
key: train_jcc
|
|
value: [0.8 0.81192661 0.82159624 0.79908676 0.79279279 0.8156682
|
|
0.8 0.83490566 0.81278539 0.80454545]
|
|
|
|
mean value: 0.8093307106235347
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.47419095 1.40329194 1.42739582 1.49662685 1.34318018 1.49213672
|
|
1.31603861 1.42225814 1.39964223 1.34260798]
|
|
|
|
mean value: 1.4117369413375855
|
|
|
|
key: score_time
|
|
value: [0.01513195 0.02292132 0.0230453 0.01247382 0.01239777 0.0147655
|
|
0.01543188 0.01502585 0.01510143 0.01240325]
|
|
|
|
mean value: 0.015869808197021485
|
|
|
|
key: test_mcc
|
|
value: [0.76277007 0.95346259 0.90692382 0.86240942 0.95238095 0.70714286
|
|
0.90649828 0.51320273 0.85441771 0.8047619 ]
|
|
|
|
mean value: 0.8223970331207965
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88095238 0.97619048 0.95121951 0.92682927 0.97560976 0.85365854
|
|
0.95121951 0.75609756 0.92682927 0.90243902]
|
|
|
|
mean value: 0.9101045296167247
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88372093 0.97674419 0.95 0.93333333 0.97560976 0.85714286
|
|
0.94736842 0.73684211 0.92307692 0.9 ]
|
|
|
|
mean value: 0.9083838512245533
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.86363636 0.95454545 1. 0.875 1. 0.85714286
|
|
1. 0.77777778 0.94736842 0.9 ]
|
|
|
|
mean value: 0.9175470874155085
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 0.9047619 1. 0.95238095 0.85714286
|
|
0.9 0.7 0.9 0.9 ]
|
|
|
|
mean value: 0.9019047619047619
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88095238 0.97619048 0.95238095 0.925 0.97619048 0.85357143
|
|
0.95 0.7547619 0.92619048 0.90238095]
|
|
|
|
mean value: 0.9097619047619048
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.79166667 0.95454545 0.9047619 0.875 0.95238095 0.75
|
|
0.9 0.58333333 0.85714286 0.81818182]
|
|
|
|
mean value: 0.8387012987012987
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03340149 0.01821494 0.01930451 0.02136612 0.01852131 0.01884985
|
|
0.02576661 0.01696181 0.01858449 0.01900506]
|
|
|
|
mean value: 0.02099761962890625
|
|
|
|
key: score_time
|
|
value: [0.01251078 0.00937796 0.01435971 0.00949097 0.00899172 0.00927854
|
|
0.01029158 0.01122713 0.00910568 0.00921607]
|
|
|
|
mean value: 0.01038501262664795
|
|
|
|
key: test_mcc
|
|
value: [0.71754731 0.85811633 1. 0.86240942 0.90692382 0.8547619
|
|
0.86240942 0.86333169 0.85441771 0.90238095]
|
|
|
|
mean value: 0.8682298554829512
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.92857143 1. 0.92682927 0.95121951 0.92682927
|
|
0.92682927 0.92682927 0.92682927 0.95121951]
|
|
|
|
mean value: 0.9322299651567945
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85 0.92682927 1. 0.93333333 0.95 0.92682927
|
|
0.91891892 0.93023256 0.92307692 0.95 ]
|
|
|
|
mean value: 0.9309220270054076
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.95 1. 0.875 1. 0.95
|
|
1. 0.86956522 0.94736842 0.95 ]
|
|
|
|
mean value: 0.9436670480549199
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.9047619 1. 1. 0.9047619 0.9047619
|
|
0.85 1. 0.9 0.95 ]
|
|
|
|
mean value: 0.9223809523809524
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.85714286 0.92857143 1. 0.925 0.95238095 0.92738095
|
|
0.925 0.92857143 0.92619048 0.95119048]
|
|
|
|
mean value: 0.9321428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.73913043 0.86363636 1. 0.875 0.9047619 0.86363636
|
|
0.85 0.86956522 0.85714286 0.9047619 ]
|
|
|
|
mean value: 0.8727635046113307
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.9
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12875319 0.12269926 0.12715673 0.13435197 0.13244057 0.13502121
|
|
0.12934875 0.12360287 0.13284302 0.13551545]
|
|
|
|
mean value: 0.13017330169677735
|
|
|
|
key: score_time
|
|
value: [0.01931357 0.01789641 0.01895833 0.02062035 0.02520704 0.02190733
|
|
0.01751018 0.01830721 0.02011347 0.01819706]
|
|
|
|
mean value: 0.0198030948638916
|
|
|
|
key: test_mcc
|
|
value: [0.64597519 0.8660254 0.80817439 0.80817439 0.8547619 0.80907152
|
|
1. 0.71121921 0.80817439 0.95238095]
|
|
|
|
mean value: 0.8263957356100416
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.92857143 0.90243902 0.90243902 0.92682927 0.90243902
|
|
1. 0.85365854 0.90243902 0.97560976]
|
|
|
|
mean value: 0.9103948896631824
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.93333333 0.90909091 0.90909091 0.92682927 0.9
|
|
1. 0.85714286 0.89473684 0.97560976]
|
|
|
|
mean value: 0.9139167208486849
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.74074074 0.875 0.86956522 0.86956522 0.95 0.94736842
|
|
1. 0.81818182 0.94444444 0.95238095]
|
|
|
|
mean value: 0.8967246811583196
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 0.95238095 0.95238095 0.9047619 0.85714286
|
|
1. 0.9 0.85 1. ]
|
|
|
|
mean value: 0.9369047619047619
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.80952381 0.92857143 0.90119048 0.90119048 0.92738095 0.90357143
|
|
1. 0.8547619 0.90119048 0.97619048]
|
|
|
|
mean value: 0.9103571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.875 0.83333333 0.83333333 0.86363636 0.81818182
|
|
1. 0.75 0.80952381 0.95238095]
|
|
|
|
mean value: 0.8449675324675325
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.71
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00980496 0.00954533 0.00995851 0.01084757 0.0097611 0.00990272
|
|
0.00961041 0.00956297 0.01108313 0.01018977]
|
|
|
|
mean value: 0.010026645660400391
|
|
|
|
key: score_time
|
|
value: [0.00882721 0.00858307 0.00972176 0.00881481 0.00876117 0.00892162
|
|
0.0086 0.00859857 0.00930023 0.00956869]
|
|
|
|
mean value: 0.008969712257385253
|
|
|
|
key: test_mcc
|
|
value: [0.47673129 0.49029034 0.51190476 0.56190476 0.6133669 0.6133669
|
|
0.66432098 0.46428571 0.51966679 0.46428571]
|
|
|
|
mean value: 0.5380124156067321
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73809524 0.73809524 0.75609756 0.7804878 0.80487805 0.80487805
|
|
0.82926829 0.73170732 0.75609756 0.73170732]
|
|
|
|
mean value: 0.7671312427409989
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.73170732 0.76595745 0.76190476 0.7804878 0.8 0.8
|
|
0.81081081 0.73170732 0.72222222 0.73170732]
|
|
|
|
mean value: 0.7636504997843866
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.69230769 0.76190476 0.8 0.84210526 0.84210526
|
|
0.88235294 0.71428571 0.8125 0.71428571]
|
|
|
|
mean value: 0.7811847350276143
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.85714286 0.76190476 0.76190476 0.76190476 0.76190476
|
|
0.75 0.75 0.65 0.75 ]
|
|
|
|
mean value: 0.7519047619047619
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.73809524 0.73809524 0.75595238 0.78095238 0.80595238 0.80595238
|
|
0.82738095 0.73214286 0.75357143 0.73214286]
|
|
|
|
mean value: 0.7670238095238096
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57692308 0.62068966 0.61538462 0.64 0.66666667 0.66666667
|
|
0.68181818 0.57692308 0.56521739 0.57692308]
|
|
|
|
mean value: 0.6187212407782122
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.62729216 1.63516164 1.67529535 1.64418316 1.65478873 1.56067038
|
|
1.59187913 1.81973457 1.68453097 1.64458179]
|
|
|
|
mean value: 1.65381178855896
|
|
|
|
key: score_time
|
|
value: [0.09210157 0.09104776 0.0897491 0.08998299 0.09835529 0.09500146
|
|
0.10104084 0.10681438 0.09972382 0.09916043]
|
|
|
|
mean value: 0.0962977647781372
|
|
|
|
key: test_mcc
|
|
value: [0.90889326 0.90889326 0.85441771 0.86240942 1. 0.90238095
|
|
0.95227002 0.8547619 0.95238095 0.95238095]
|
|
|
|
mean value: 0.9148788418773526
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.95238095 0.92682927 0.92682927 1. 0.95121951
|
|
0.97560976 0.92682927 0.97560976 0.97560976]
|
|
|
|
mean value: 0.9563298490127758
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95454545 0.95454545 0.93023256 0.93333333 1. 0.95238095
|
|
0.97435897 0.92682927 0.97560976 0.97560976]
|
|
|
|
mean value: 0.9577445507791509
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91304348 0.91304348 0.90909091 0.875 1. 0.95238095
|
|
1. 0.9047619 0.95238095 0.95238095]
|
|
|
|
mean value: 0.937208262751741
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.95238095 1. 1. 0.95238095
|
|
0.95 0.95 1. 1. ]
|
|
|
|
mean value: 0.9804761904761905
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.95238095 0.92619048 0.925 1. 0.95119048
|
|
0.975 0.92738095 0.97619048 0.97619048]
|
|
|
|
mean value: 0.9561904761904761
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91304348 0.91304348 0.86956522 0.875 1. 0.90909091
|
|
0.95 0.86363636 0.95238095 0.95238095]
|
|
|
|
mean value: 0.919814135140222
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.88
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.991997 0.92924547 0.92070866 0.94155884 0.96088862 0.94280195
|
|
0.9203403 0.98444152 1.03350067 1.02090788]
|
|
|
|
mean value: 0.9646390914916992
|
|
|
|
key: score_time
|
|
value: [0.24833751 0.24513054 0.12675023 0.24418807 0.22218466 0.22113061
|
|
0.27262068 0.13829207 0.15273929 0.18978667]
|
|
|
|
mean value: 0.20611603260040284
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.90889326 0.85441771 0.86240942 0.95227002 0.90238095
|
|
1. 0.76500781 0.90238095 0.95238095]
|
|
|
|
mean value: 0.9053603651273313
|
|
|
|
key: train_mcc
|
|
value: [0.96779381 0.96779381 0.978494 0.97317407 0.978494 0.9734012
|
|
0.97317174 0.97849275 0.96787795 0.97849275]
|
|
|
|
mean value: 0.973718608698558
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.95238095 0.92682927 0.92682927 0.97560976 0.95121951
|
|
1. 0.87804878 0.95121951 0.97560976]
|
|
|
|
mean value: 0.9513937282229965
|
|
|
|
key: train_accuracy
|
|
value: [0.98378378 0.98378378 0.98921833 0.98652291 0.98921833 0.98652291
|
|
0.98652291 0.98921833 0.98382749 0.98921833]
|
|
|
|
mean value: 0.9867837109346543
|
|
|
|
key: test_fscore
|
|
value: [0.97674419 0.95454545 0.93023256 0.93333333 0.97674419 0.95238095
|
|
1. 0.88372093 0.95 0.97560976]
|
|
|
|
mean value: 0.9533311356822418
|
|
|
|
key: train_fscore
|
|
value: [0.98395722 0.98395722 0.98924731 0.98659517 0.98924731 0.98666667
|
|
0.98666667 0.98930481 0.98404255 0.98930481]
|
|
|
|
mean value: 0.9868989748614594
|
|
|
|
key: test_precision
|
|
value: [0.95454545 0.91304348 0.90909091 0.875 0.95454545 0.95238095
|
|
1. 0.82608696 0.95 0.95238095]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
0.9287074157726332
|
|
|
|
key: train_precision
|
|
value: [0.97354497 0.97354497 0.98395722 0.9787234 0.98395722 0.97368421
|
|
0.97883598 0.98404255 0.97368421 0.98404255]
|
|
|
|
mean value: 0.9788017296119529
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.95238095 1. 1. 0.95238095
|
|
1. 0.95 0.95 1. ]
|
|
|
|
mean value: 0.9804761904761905
|
|
|
|
key: train_recall
|
|
value: [0.99459459 0.99459459 0.99459459 0.99459459 0.99459459 1.
|
|
0.99462366 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9951467596628887
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.95238095 0.92619048 0.925 0.975 0.95119048
|
|
1. 0.8797619 0.95119048 0.97619048]
|
|
|
|
mean value: 0.9513095238095238
|
|
|
|
key: train_roc_auc
|
|
value: [0.98378378 0.98378378 0.98923278 0.98654461 0.98923278 0.98655914
|
|
0.98650102 0.98920372 0.98379831 0.98920372]
|
|
|
|
mean value: 0.9867843650101715
|
|
|
|
key: test_jcc
|
|
value: [0.95454545 0.91304348 0.86956522 0.875 0.95454545 0.90909091
|
|
1. 0.79166667 0.9047619 0.95238095]
|
|
|
|
mean value: 0.9124600037643515
|
|
|
|
key: train_jcc
|
|
value: [0.96842105 0.96842105 0.9787234 0.97354497 0.9787234 0.97368421
|
|
0.97368421 0.97883598 0.96858639 0.97883598]
|
|
|
|
mean value: 0.9741460653477914
|
|
|
|
MCC on Blind test: 0.9
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02595615 0.00980139 0.010777 0.00991344 0.01007867 0.01106524
|
|
0.01026869 0.010144 0.01002526 0.01101446]
|
|
|
|
mean value: 0.011904430389404298
|
|
|
|
key: score_time
|
|
value: [0.01006055 0.00894856 0.01005125 0.0089395 0.00916576 0.00964999
|
|
0.0100596 0.00945497 0.00976562 0.0090816 ]
|
|
|
|
mean value: 0.009517741203308106
|
|
|
|
key: test_mcc
|
|
value: [0.57735027 0.57735027 0.75714286 0.67700771 0.7633652 0.65871309
|
|
0.7633652 0.71121921 0.70714286 0.90238095]
|
|
|
|
mean value: 0.7095037610692975
|
|
|
|
key: train_mcc
|
|
value: [0.75417724 0.74123391 0.71558817 0.74235478 0.74235478 0.74235478
|
|
0.72167661 0.72031226 0.73194294 0.73194294]
|
|
|
|
mean value: 0.7343938376347271
|
|
|
|
key: test_accuracy
|
|
value: [0.78571429 0.78571429 0.87804878 0.82926829 0.87804878 0.82926829
|
|
0.87804878 0.85365854 0.85365854 0.95121951]
|
|
|
|
mean value: 0.8522648083623693
|
|
|
|
key: train_accuracy
|
|
value: [0.87567568 0.87027027 0.85714286 0.87061995 0.87061995 0.87061995
|
|
0.85983827 0.85983827 0.86522911 0.86522911]
|
|
|
|
mean value: 0.8665083412253224
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.8 0.87804878 0.85106383 0.88888889 0.8372093
|
|
0.86486486 0.85714286 0.85 0.95 ]
|
|
|
|
mean value: 0.8577218523497231
|
|
|
|
key: train_fscore
|
|
value: [0.88082902 0.87301587 0.86089239 0.87368421 0.87368421 0.87368421
|
|
0.86528497 0.86315789 0.86979167 0.86979167]
|
|
|
|
mean value: 0.8703816110753745
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 0.9 0.76923077 0.83333333 0.81818182
|
|
0.94117647 0.81818182 0.85 0.95 ]
|
|
|
|
mean value: 0.8380104209515974
|
|
|
|
key: train_precision
|
|
value: [0.84577114 0.85492228 0.83673469 0.85128205 0.85128205 0.85128205
|
|
0.835 0.84536082 0.84343434 0.84343434]
|
|
|
|
mean value: 0.8458503783406013
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.85714286 0.85714286 0.95238095 0.95238095 0.85714286
|
|
0.8 0.9 0.85 0.95 ]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.91891892 0.89189189 0.88648649 0.8972973 0.8972973 0.8972973
|
|
0.89784946 0.88172043 0.89784946 0.89784946]
|
|
|
|
mean value: 0.8964458006393491
|
|
|
|
key: test_roc_auc
|
|
value: [0.78571429 0.78571429 0.87857143 0.82619048 0.87619048 0.82857143
|
|
0.87619048 0.8547619 0.85357143 0.95119048]
|
|
|
|
mean value: 0.8516666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.87567568 0.87027027 0.85722174 0.87069166 0.87069166 0.87069166
|
|
0.85973554 0.85977913 0.86514095 0.86514095]
|
|
|
|
mean value: 0.8665039232781169
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.66666667 0.7826087 0.74074074 0.8 0.72
|
|
0.76190476 0.75 0.73913043 0.9047619 ]
|
|
|
|
mean value: 0.7532479871175524
|
|
|
|
key: train_jcc
|
|
value: [0.78703704 0.77464789 0.75576037 0.77570093 0.77570093 0.77570093
|
|
0.76255708 0.75925926 0.76958525 0.76958525]
|
|
|
|
mean value: 0.7705534940560166
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.12928247 0.29347372 0.07189345 0.06359673 0.07391357 0.07862544
|
|
0.0591352 0.05616426 0.06642604 0.08556604]
|
|
|
|
mean value: 0.09780769348144532
|
|
|
|
key: score_time
|
|
value: [0.01163054 0.01359677 0.01183581 0.01162839 0.01110625 0.01166606
|
|
0.01094699 0.01067758 0.01141667 0.01115537]
|
|
|
|
mean value: 0.01156604290008545
|
|
|
|
key: test_mcc
|
|
value: [1. 0.95346259 1. 0.80817439 0.95238095 0.95238095
|
|
0.90649828 0.86333169 0.90238095 0.95238095]
|
|
|
|
mean value: 0.9290990764599092
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97619048 1. 0.90243902 0.97560976 0.97560976
|
|
0.95121951 0.92682927 0.95121951 0.97560976]
|
|
|
|
mean value: 0.963472706155633
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97674419 1. 0.90909091 0.97560976 0.97560976
|
|
0.94736842 0.93023256 0.95 0.97560976]
|
|
|
|
mean value: 0.964026534262227
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95454545 1. 0.86956522 1. 1.
|
|
1. 0.86956522 0.95 0.95238095]
|
|
|
|
mean value: 0.9596056841709015
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.95238095 0.95238095 0.95238095
|
|
0.9 1. 0.95 1. ]
|
|
|
|
mean value: 0.9707142857142856
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97619048 1. 0.90119048 0.97619048 0.97619048
|
|
0.95 0.92857143 0.95119048 0.97619048]
|
|
|
|
mean value: 0.9635714285714285
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95454545 1. 0.83333333 0.95238095 0.95238095
|
|
0.9 0.86956522 0.9047619 0.95238095]
|
|
|
|
mean value: 0.9319348767174854
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05552244 0.11581016 0.08573031 0.08327365 0.06015062 0.04256988
|
|
0.04870296 0.08871055 0.0668242 0.07970572]
|
|
|
|
mean value: 0.07270004749298095
|
|
|
|
key: score_time
|
|
value: [0.02346492 0.02336931 0.0217948 0.02209783 0.01211452 0.01214552
|
|
0.02227259 0.02448988 0.02263021 0.01856112]
|
|
|
|
mean value: 0.02029407024383545
|
|
|
|
key: test_mcc
|
|
value: [1. 0.90889326 0.8547619 0.90238095 0.90692382 0.80907152
|
|
0.86240942 0.65952381 0.95227002 0.7565654 ]
|
|
|
|
mean value: 0.861280009770891
|
|
|
|
key: train_mcc
|
|
value: [0.98379816 0.99460913 0.98921825 0.98921825 0.98921825 1.
|
|
0.97849275 0.98384144 0.98921825 0.99462366]
|
|
|
|
mean value: 0.9892238129029853
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.95238095 0.92682927 0.95121951 0.95121951 0.90243902
|
|
0.92682927 0.82926829 0.97560976 0.87804878]
|
|
|
|
mean value: 0.9293844367015098
|
|
|
|
key: train_accuracy
|
|
value: [0.99189189 0.9972973 0.99460916 0.99460916 0.99460916 1.
|
|
0.98921833 0.99191375 0.99460916 0.99730458]
|
|
|
|
mean value: 0.994606250455307
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95 0.92682927 0.95238095 0.95 0.9
|
|
0.91891892 0.82926829 0.97435897 0.87179487]
|
|
|
|
mean value: 0.9273551278429327
|
|
|
|
key: train_fscore
|
|
value: [0.99191375 0.99728997 0.99459459 0.99459459 0.99459459 1.
|
|
0.98930481 0.9919571 0.99462366 0.99730458]
|
|
|
|
mean value: 0.9946177658830327
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.95 0.95238095 1. 0.94736842
|
|
1. 0.80952381 1. 0.89473684]
|
|
|
|
mean value: 0.9554010025062657
|
|
|
|
key: train_precision
|
|
value: [0.98924731 1. 0.99459459 0.99459459 0.99459459 1.
|
|
0.98404255 0.98930481 0.99462366 1. ]
|
|
|
|
mean value: 0.9941002117551433
|
|
|
|
key: test_recall
|
|
value: [1. 0.9047619 0.9047619 0.95238095 0.9047619 0.85714286
|
|
0.85 0.85 0.95 0.85 ]
|
|
|
|
mean value: 0.9023809523809524
|
|
|
|
key: train_recall
|
|
value: [0.99459459 0.99459459 0.99459459 0.99459459 0.99459459 1.
|
|
0.99462366 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9951467596628887
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.95238095 0.92738095 0.95119048 0.95238095 0.90357143
|
|
0.925 0.8297619 0.975 0.87738095]
|
|
|
|
mean value: 0.9294047619047618
|
|
|
|
key: train_roc_auc
|
|
value: [0.99189189 0.9972973 0.99460913 0.99460913 0.99460913 1.
|
|
0.98920372 0.99190642 0.99460913 0.99731183]
|
|
|
|
mean value: 0.9946047660563789
|
|
|
|
key: test_jcc
|
|
value: [1. 0.9047619 0.86363636 0.90909091 0.9047619 0.81818182
|
|
0.85 0.70833333 0.95 0.77272727]
|
|
|
|
mean value: 0.8681493506493506
|
|
|
|
key: train_jcc
|
|
value: [0.98395722 0.99459459 0.98924731 0.98924731 0.98924731 1.
|
|
0.97883598 0.98404255 0.98930481 0.99462366]
|
|
|
|
mean value: 0.9893100750105474
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01387548 0.01265335 0.01496863 0.0118649 0.01619577 0.01203036
|
|
0.01009297 0.01275992 0.01414347 0.01146722]
|
|
|
|
mean value: 0.013005208969116212
|
|
|
|
key: score_time
|
|
value: [0.01421428 0.0107317 0.01549006 0.00962782 0.01458716 0.01043081
|
|
0.00931191 0.01516914 0.01056194 0.01001906]
|
|
|
|
mean value: 0.012014389038085938
|
|
|
|
key: test_mcc
|
|
value: [0.66742381 0.66742381 0.86240942 0.57570364 0.56527676 0.60952381
|
|
0.8047619 0.71121921 0.8047619 0.90238095]
|
|
|
|
mean value: 0.7170885216941957
|
|
|
|
key: train_mcc
|
|
value: [0.73041298 0.74267016 0.74754216 0.7583352 0.77584118 0.78734807
|
|
0.73148107 0.75792591 0.75307912 0.75412101]
|
|
|
|
mean value: 0.7538756859843339
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.83333333 0.92682927 0.7804878 0.7804878 0.80487805
|
|
0.90243902 0.85365854 0.90243902 0.95121951]
|
|
|
|
mean value: 0.8569105691056911
|
|
|
|
key: train_accuracy
|
|
value: [0.86486486 0.87027027 0.87331536 0.8787062 0.88679245 0.89218329
|
|
0.86522911 0.8787062 0.87601078 0.87601078]
|
|
|
|
mean value: 0.8762089313032709
|
|
|
|
key: test_fscore
|
|
value: [0.8372093 0.82926829 0.93333333 0.80851064 0.8 0.80952381
|
|
0.9 0.85714286 0.9 0.95 ]
|
|
|
|
mean value: 0.8624988233306381
|
|
|
|
key: train_fscore
|
|
value: [0.86772487 0.875 0.87598945 0.88126649 0.890625 0.89637306
|
|
0.86910995 0.88126649 0.87958115 0.88082902]
|
|
|
|
mean value: 0.87977654671808
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.85 0.875 0.73076923 0.75 0.80952381
|
|
0.9 0.81818182 0.9 0.95 ]
|
|
|
|
mean value: 0.8401656676656677
|
|
|
|
key: train_precision
|
|
value: [0.84974093 0.84422111 0.8556701 0.86082474 0.85929648 0.86069652
|
|
0.84693878 0.86528497 0.85714286 0.85 ]
|
|
|
|
mean value: 0.8549816490102271
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.80952381 1. 0.9047619 0.85714286 0.80952381
|
|
0.9 0.9 0.9 0.95 ]
|
|
|
|
mean value: 0.8888095238095238
|
|
|
|
key: train_recall
|
|
value: [0.88648649 0.90810811 0.8972973 0.9027027 0.92432432 0.93513514
|
|
0.89247312 0.89784946 0.90322581 0.91397849]
|
|
|
|
mean value: 0.9061580935774485
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.83333333 0.925 0.77738095 0.77857143 0.8047619
|
|
0.90238095 0.8547619 0.90238095 0.95119048]
|
|
|
|
mean value: 0.8563095238095237
|
|
|
|
key: train_roc_auc
|
|
value: [0.86486486 0.87027027 0.87337983 0.87877071 0.88689334 0.89229875
|
|
0.86515548 0.87865446 0.87593723 0.87590817]
|
|
|
|
mean value: 0.8762133100842778
|
|
|
|
key: test_jcc
|
|
value: [0.72 0.70833333 0.875 0.67857143 0.66666667 0.68
|
|
0.81818182 0.75 0.81818182 0.9047619 ]
|
|
|
|
mean value: 0.761969696969697
|
|
|
|
key: train_jcc
|
|
value: [0.76635514 0.77777778 0.77934272 0.78773585 0.8028169 0.81220657
|
|
0.76851852 0.78773585 0.78504673 0.78703704]
|
|
|
|
mean value: 0.7854573097788518
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02087641 0.01695037 0.02051234 0.02458811 0.01624537 0.02017117
|
|
0.0228157 0.018888 0.01997352 0.02516794]
|
|
|
|
mean value: 0.020618891716003417
|
|
|
|
key: score_time
|
|
value: [0.01020479 0.01029229 0.01238561 0.01190782 0.01196027 0.01182055
|
|
0.01182747 0.01213884 0.01230955 0.0123229 ]
|
|
|
|
mean value: 0.011717009544372558
|
|
|
|
key: test_mcc
|
|
value: [0.82462113 0.90889326 0.90238095 0.90649828 0.77831178 0.7633652
|
|
0.90649828 0.72229808 0.90649828 0.85441771]
|
|
|
|
mean value: 0.8473782943864396
|
|
|
|
key: train_mcc
|
|
value: [0.96285167 0.94733093 0.96788166 0.98395676 0.77640338 0.93222307
|
|
0.97849275 0.87315103 0.92718645 0.98927544]
|
|
|
|
mean value: 0.9338753151800493
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.95238095 0.95121951 0.95121951 0.87804878 0.87804878
|
|
0.95121951 0.85365854 0.95121951 0.92682927]
|
|
|
|
mean value: 0.9198606271777003
|
|
|
|
key: train_accuracy
|
|
value: [0.98108108 0.97297297 0.98382749 0.99191375 0.87601078 0.96495957
|
|
0.98921833 0.93261456 0.96226415 0.99460916]
|
|
|
|
mean value: 0.9649471843811467
|
|
|
|
key: test_fscore
|
|
value: [0.91304348 0.95454545 0.95238095 0.95454545 0.89361702 0.88888889
|
|
0.94736842 0.86363636 0.94736842 0.92307692]
|
|
|
|
mean value: 0.9238471378716766
|
|
|
|
key: train_fscore
|
|
value: [0.98143236 0.97368421 0.98395722 0.9919571 0.88942308 0.96605744
|
|
0.98930481 0.93702771 0.96089385 0.99465241]
|
|
|
|
mean value: 0.9668390195062844
|
|
|
|
key: test_precision
|
|
value: [0.84 0.91304348 0.95238095 0.91304348 0.80769231 0.83333333
|
|
1. 0.79166667 1. 0.94736842]
|
|
|
|
mean value: 0.899852863764763
|
|
|
|
key: train_precision
|
|
value: [0.96354167 0.94871795 0.97354497 0.98404255 0.8008658 0.93434343
|
|
0.98404255 0.88151659 1. 0.9893617 ]
|
|
|
|
mean value: 0.9459977220327187
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.95238095 1. 1. 0.95238095
|
|
0.9 0.95 0.9 0.9 ]
|
|
|
|
mean value: 0.9554761904761905
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.99459459 1. 1. 1.
|
|
0.99462366 1. 0.92473118 1. ]
|
|
|
|
mean value: 0.9913949433304272
|
|
|
|
key: test_roc_auc
|
|
value: [0.9047619 0.95238095 0.95119048 0.95 0.875 0.87619048
|
|
0.95 0.85595238 0.95 0.92619048]
|
|
|
|
mean value: 0.9191666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.98108108 0.97297297 0.98385644 0.99193548 0.87634409 0.96505376
|
|
0.98920372 0.93243243 0.96236559 0.99459459]
|
|
|
|
mean value: 0.9649840162743388
|
|
|
|
key: test_jcc
|
|
value: [0.84 0.91304348 0.90909091 0.91304348 0.80769231 0.8
|
|
0.9 0.76 0.9 0.85714286]
|
|
|
|
mean value: 0.8600013030447813
|
|
|
|
key: train_jcc
|
|
value: [0.96354167 0.94871795 0.96842105 0.98404255 0.8008658 0.93434343
|
|
0.97883598 0.88151659 0.92473118 0.9893617 ]
|
|
|
|
mean value: 0.9374377907853981
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01795006 0.0186522 0.01706672 0.01866984 0.01695228 0.01767445
|
|
0.01763296 0.01684928 0.01647019 0.01648283]
|
|
|
|
mean value: 0.017440080642700195
|
|
|
|
key: score_time
|
|
value: [0.01651978 0.01239181 0.01181412 0.01196337 0.01207924 0.01195359
|
|
0.01196837 0.01217914 0.01193547 0.01189327]
|
|
|
|
mean value: 0.012469816207885741
|
|
|
|
key: test_mcc
|
|
value: [0.8660254 0.5956834 0.86333169 0.86240942 0.95227002 0.75714286
|
|
0.95227002 0.38363297 0.8547619 0.7197263 ]
|
|
|
|
mean value: 0.7807253975409502
|
|
|
|
key: train_mcc
|
|
value: [0.97837838 0.57110846 0.9048433 0.97866529 0.93728335 0.98384191
|
|
0.95737027 0.77195645 0.92138789 0.93618785]
|
|
|
|
mean value: 0.8941023146571723
|
|
|
|
key: test_accuracy
|
|
value: [0.92857143 0.76190476 0.92682927 0.92682927 0.97560976 0.87804878
|
|
0.97560976 0.68292683 0.92682927 0.85365854]
|
|
|
|
mean value: 0.8836817653890825
|
|
|
|
key: train_accuracy
|
|
value: [0.98918919 0.74594595 0.95148248 0.98921833 0.96765499 0.99191375
|
|
0.97843666 0.87331536 0.95956873 0.96765499]
|
|
|
|
mean value: 0.9414380418154003
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.80769231 0.92307692 0.93333333 0.97674419 0.87804878
|
|
0.97435897 0.60606061 0.92682927 0.83333333]
|
|
|
|
mean value: 0.879281104601581
|
|
|
|
key: train_fscore
|
|
value: [0.98918919 0.79741379 0.94972067 0.98930481 0.96858639 0.99191375
|
|
0.97883598 0.85538462 0.96103896 0.96703297]
|
|
|
|
mean value: 0.9448421121875729
|
|
|
|
key: test_precision
|
|
value: [0.875 0.67741935 1. 0.875 0.95454545 0.9
|
|
1. 0.76923077 0.9047619 0.9375 ]
|
|
|
|
mean value: 0.8893457483376839
|
|
|
|
key: train_precision
|
|
value: [0.98918919 0.66308244 0.98265896 0.97883598 0.93908629 0.98924731
|
|
0.96354167 1. 0.92964824 0.98876404]
|
|
|
|
mean value: 0.9424054123899444
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.85714286 1. 1. 0.85714286
|
|
0.95 0.5 0.95 0.75 ]
|
|
|
|
mean value: 0.8864285714285715
|
|
|
|
key: train_recall
|
|
value: [0.98918919 1. 0.91891892 1. 1. 0.99459459
|
|
0.99462366 0.74731183 0.99462366 0.94623656]
|
|
|
|
mean value: 0.9585498401627434
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 0.76190476 0.92857143 0.925 0.975 0.87857143
|
|
0.975 0.67857143 0.92738095 0.85119048]
|
|
|
|
mean value: 0.8829761904761905
|
|
|
|
key: train_roc_auc
|
|
value: [0.98918919 0.74594595 0.95139494 0.98924731 0.96774194 0.99192095
|
|
0.97839291 0.87365591 0.95947399 0.96771287]
|
|
|
|
mean value: 0.941467596628887
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.67741935 0.85714286 0.875 0.95454545 0.7826087
|
|
0.95 0.43478261 0.86363636 0.71428571]
|
|
|
|
mean value: 0.7984421048796926
|
|
|
|
key: train_jcc
|
|
value: [0.97860963 0.66308244 0.90425532 0.97883598 0.93908629 0.98395722
|
|
0.95854922 0.74731183 0.925 0.93617021]
|
|
|
|
mean value: 0.9014858138117805
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20878315 0.1760838 0.18187737 0.17748427 0.20082045 0.19634724
|
|
0.19840193 0.17302775 0.16932535 0.17095423]
|
|
|
|
mean value: 0.18531055450439454
|
|
|
|
key: score_time
|
|
value: [0.01604414 0.01630116 0.01627135 0.01687264 0.01652217 0.02443409
|
|
0.014992 0.01500082 0.01513028 0.01563883]
|
|
|
|
mean value: 0.01672074794769287
|
|
|
|
key: test_mcc
|
|
value: [1. 0.95346259 1. 0.7633652 0.95238095 0.90238095
|
|
1. 0.70714286 1. 0.95238095]
|
|
|
|
mean value: 0.9231113500473697
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97619048 1. 0.87804878 0.97560976 0.95121951
|
|
1. 0.85365854 1. 0.97560976]
|
|
|
|
mean value: 0.9610336817653891
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97674419 1. 0.88888889 0.97560976 0.95238095
|
|
1. 0.85 1. 0.97560976]
|
|
|
|
mean value: 0.9619233539511475
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95454545 1. 0.83333333 1. 0.95238095
|
|
1. 0.85 1. 0.95238095]
|
|
|
|
mean value: 0.9542640692640693
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.95238095 0.95238095 0.95238095
|
|
1. 0.85 1. 1. ]
|
|
|
|
mean value: 0.9707142857142856
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97619048 1. 0.87619048 0.97619048 0.95119048
|
|
1. 0.85357143 1. 0.97619048]
|
|
|
|
mean value: 0.960952380952381
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95454545 1. 0.8 0.95238095 0.90909091
|
|
1. 0.73913043 1. 0.95238095]
|
|
|
|
mean value: 0.9307528703180877
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.87
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06194448 0.06513834 0.06491184 0.06082225 0.07302451 0.05870032
|
|
0.05307627 0.06909204 0.07765031 0.0584054 ]
|
|
|
|
mean value: 0.06427657604217529
|
|
|
|
key: score_time
|
|
value: [0.01991296 0.02684236 0.03728247 0.02274609 0.03553915 0.02886772
|
|
0.03747249 0.0306232 0.03088164 0.0238471 ]
|
|
|
|
mean value: 0.029401516914367674
|
|
|
|
key: test_mcc
|
|
value: [1. 0.9047619 0.95238095 0.81975606 0.90692382 0.8547619
|
|
0.90649828 0.86333169 0.90238095 0.95238095]
|
|
|
|
mean value: 0.9063176525865053
|
|
|
|
key: train_mcc
|
|
value: [0.98918919 0.989247 0.98921825 0.9946235 0.99462366 0.98921825
|
|
1. 0.9946235 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9929990657442348
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.95238095 0.97560976 0.90243902 0.95121951 0.92682927
|
|
0.95121951 0.92682927 0.95121951 0.97560976]
|
|
|
|
mean value: 0.951335656213705
|
|
|
|
key: train_accuracy
|
|
value: [0.99459459 0.99459459 0.99460916 0.99730458 0.99730458 0.99460916
|
|
1. 0.99730458 0.99730458 0.99730458]
|
|
|
|
mean value: 0.9964930429081372
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95238095 0.97560976 0.91304348 0.95 0.92682927
|
|
0.94736842 0.93023256 0.95 0.97560976]
|
|
|
|
mean value: 0.9521074190321793
|
|
|
|
key: train_fscore
|
|
value: [0.99459459 0.99462366 0.99459459 0.99728997 0.99730458 0.99459459
|
|
1. 0.99731903 0.99730458 0.99730458]
|
|
|
|
mean value: 0.9964930194080766
|
|
|
|
key: test_precision
|
|
value: [1. 0.95238095 1. 0.84 1. 0.95
|
|
1. 0.86956522 0.95 0.95238095]
|
|
|
|
mean value: 0.9514327122153209
|
|
|
|
key: train_precision
|
|
value: [0.99459459 0.98930481 0.99459459 1. 0.99462366 0.99459459
|
|
1. 0.99465241 1. 1. ]
|
|
|
|
mean value: 0.9962364658949099
|
|
|
|
key: test_recall
|
|
value: [1. 0.95238095 0.95238095 1. 0.9047619 0.9047619
|
|
0.9 1. 0.95 1. ]
|
|
|
|
mean value: 0.9564285714285714
|
|
|
|
key: train_recall
|
|
value: [0.99459459 1. 0.99459459 0.99459459 1. 0.99459459
|
|
1. 1. 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9967625690206335
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.95238095 0.97619048 0.9 0.95238095 0.92738095
|
|
0.95 0.92857143 0.95119048 0.97619048]
|
|
|
|
mean value: 0.9514285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.99459459 0.99459459 0.99460913 0.9972973 0.99731183 0.99460913
|
|
1. 0.9972973 0.99731183 0.99731183]
|
|
|
|
mean value: 0.9964937518163325
|
|
|
|
key: test_jcc
|
|
value: [1. 0.90909091 0.95238095 0.84 0.9047619 0.86363636
|
|
0.9 0.86956522 0.9047619 0.95238095]
|
|
|
|
mean value: 0.9096578204404291
|
|
|
|
key: train_jcc
|
|
value: [0.98924731 0.98930481 0.98924731 0.99459459 0.99462366 0.98924731
|
|
1. 0.99465241 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9930164717071738
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.18906832 0.13127685 0.1263175 0.1358242 0.13411975 0.15867543
|
|
0.19670749 0.14597034 0.14964938 0.18066359]
|
|
|
|
mean value: 0.15482728481292723
|
|
|
|
key: score_time
|
|
value: [0.02278852 0.02293873 0.02779603 0.02318168 0.02501178 0.0236268
|
|
0.02434683 0.0294075 0.02881098 0.0234859 ]
|
|
|
|
mean value: 0.025139474868774415
|
|
|
|
key: test_mcc
|
|
value: [0.66742381 0.66742381 0.56190476 0.60952381 0.85441771 0.56190476
|
|
0.66432098 0.56086079 0.70714286 0.8047619 ]
|
|
|
|
mean value: 0.6659685192278054
|
|
|
|
key: train_mcc
|
|
value: [0.98379816 0.99460913 0.98395537 0.98395537 0.98921825 1.
|
|
0.9946235 0.978494 0.98927606 0.98384191]
|
|
|
|
mean value: 0.9881771737535285
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.83333333 0.7804878 0.80487805 0.92682927 0.7804878
|
|
0.82926829 0.7804878 0.85365854 0.90243902]
|
|
|
|
mean value: 0.832520325203252
|
|
|
|
key: train_accuracy
|
|
value: [0.99189189 0.9972973 0.99191375 0.99191375 0.99460916 1.
|
|
0.99730458 0.98921833 0.99460916 0.99191375]
|
|
|
|
mean value: 0.9940671668973555
|
|
|
|
key: test_fscore
|
|
value: [0.8372093 0.82926829 0.7804878 0.80952381 0.93023256 0.7804878
|
|
0.81081081 0.76923077 0.85 0.9 ]
|
|
|
|
mean value: 0.829725115246953
|
|
|
|
key: train_fscore
|
|
value: [0.99186992 0.99728997 0.99182561 0.99182561 0.99459459 1.
|
|
0.99731903 0.98918919 0.99459459 0.99191375]
|
|
|
|
mean value: 0.9940422277618608
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.85 0.8 0.80952381 0.90909091 0.8
|
|
0.88235294 0.78947368 0.85 0.9 ]
|
|
|
|
mean value: 0.8408623162183534
|
|
|
|
key: train_precision
|
|
value: [0.99456522 1. 1. 1. 0.99459459 1.
|
|
0.99465241 0.99456522 1. 0.99459459]
|
|
|
|
mean value: 0.997297203038891
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.80952381 0.76190476 0.80952381 0.95238095 0.76190476
|
|
0.75 0.75 0.85 0.9 ]
|
|
|
|
mean value: 0.8202380952380952
|
|
|
|
key: train_recall
|
|
value: [0.98918919 0.99459459 0.98378378 0.98378378 0.99459459 1.
|
|
1. 0.98387097 0.98924731 0.98924731]
|
|
|
|
mean value: 0.9908311537343796
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.83333333 0.78095238 0.8047619 0.92619048 0.78095238
|
|
0.82738095 0.7797619 0.85357143 0.90238095]
|
|
|
|
mean value: 0.8322619047619048
|
|
|
|
key: train_roc_auc
|
|
value: [0.99189189 0.9972973 0.99189189 0.99189189 0.99460913 1.
|
|
0.9972973 0.98923278 0.99462366 0.99192095]
|
|
|
|
mean value: 0.9940656785818076
|
|
|
|
key: test_jcc
|
|
value: [0.72 0.70833333 0.64 0.68 0.86956522 0.64
|
|
0.68181818 0.625 0.73913043 0.81818182]
|
|
|
|
mean value: 0.7122028985507246
|
|
|
|
key: train_jcc
|
|
value: [0.98387097 0.99459459 0.98378378 0.98378378 0.98924731 1.
|
|
0.99465241 0.97860963 0.98924731 0.98395722]
|
|
|
|
mean value: 0.988174700489691
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.75021386 0.74320316 0.67933774 0.65103602 0.65639186 0.61880779
|
|
0.64705563 0.64216733 0.64391065 0.64939094]
|
|
|
|
mean value: 0.6681514978408813
|
|
|
|
key: score_time
|
|
value: [0.01284075 0.00922751 0.00918388 0.00923729 0.00981402 0.00916934
|
|
0.00927424 0.00915313 0.0092957 0.00910234]
|
|
|
|
mean value: 0.00962982177734375
|
|
|
|
key: test_mcc
|
|
value: [1. 0.95346259 1. 0.80817439 0.95238095 0.95238095
|
|
0.90649828 0.86333169 0.90238095 0.95238095]
|
|
|
|
mean value: 0.9290990764599092
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97619048 1. 0.90243902 0.97560976 0.97560976
|
|
0.95121951 0.92682927 0.95121951 0.97560976]
|
|
|
|
mean value: 0.963472706155633
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97674419 1. 0.90909091 0.97560976 0.97560976
|
|
0.94736842 0.93023256 0.95 0.97560976]
|
|
|
|
mean value: 0.964026534262227
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95454545 1. 0.86956522 1. 1.
|
|
1. 0.86956522 0.95 0.95238095]
|
|
|
|
mean value: 0.9596056841709015
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.95238095 0.95238095 0.95238095
|
|
0.9 1. 0.95 1. ]
|
|
|
|
mean value: 0.9707142857142856
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97619048 1. 0.90119048 0.97619048 0.97619048
|
|
0.95 0.92857143 0.95119048 0.97619048]
|
|
|
|
mean value: 0.9635714285714285
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95454545 1. 0.83333333 0.95238095 0.95238095
|
|
0.9 0.86956522 0.9047619 0.95238095]
|
|
|
|
mean value: 0.9319348767174854
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03049493 0.03051066 0.03970337 0.02893615 0.02855396 0.02817822
|
|
0.02765751 0.02851939 0.02899027 0.02894664]
|
|
|
|
mean value: 0.03004910945892334
|
|
|
|
key: score_time
|
|
value: [0.0124836 0.01509047 0.02614284 0.01539373 0.02472258 0.01539445
|
|
0.01547289 0.01532865 0.01533842 0.01527309]
|
|
|
|
mean value: 0.01706407070159912
|
|
|
|
key: test_mcc
|
|
value: [0.76980036 0.76277007 0.57570364 0.58066054 0.62325386 0.61152662
|
|
0.75714286 0.60952381 0.8547619 0.7098505 ]
|
|
|
|
mean value: 0.6854994165877901
|
|
|
|
key: train_mcc
|
|
value: [0.91196665 0.93710863 0.96294605 0.91714558 0.91217304 0.88278505
|
|
1. 0.98395676 0.97866529 0.94236768]
|
|
|
|
mean value: 0.9429114712067205
|
|
|
|
key: test_accuracy
|
|
value: [0.88095238 0.88095238 0.7804878 0.7804878 0.80487805 0.80487805
|
|
0.87804878 0.80487805 0.92682927 0.85365854]
|
|
|
|
mean value: 0.8396051103368176
|
|
|
|
key: train_accuracy
|
|
value: [0.95405405 0.96756757 0.98113208 0.95687332 0.9541779 0.93800539
|
|
1. 0.99191375 0.98921833 0.9703504 ]
|
|
|
|
mean value: 0.9703292780651271
|
|
|
|
key: test_fscore
|
|
value: [0.87179487 0.88372093 0.80851064 0.75675676 0.78947368 0.81818182
|
|
0.87804878 0.8 0.92682927 0.84210526]
|
|
|
|
mean value: 0.8375422011412786
|
|
|
|
key: train_fscore
|
|
value: [0.95184136 0.96648045 0.98071625 0.95480226 0.95184136 0.93371758
|
|
1. 0.99186992 0.98913043 0.96952909]
|
|
|
|
mean value: 0.9689928698409741
|
|
|
|
key: test_precision
|
|
value: [0.94444444 0.86363636 0.73076923 0.875 0.88235294 0.7826087
|
|
0.85714286 0.8 0.9047619 0.88888889]
|
|
|
|
mean value: 0.8529605326472334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.9047619 0.9047619 0.66666667 0.71428571 0.85714286
|
|
0.9 0.8 0.95 0.8 ]
|
|
|
|
mean value: 0.8307142857142857
|
|
|
|
key: train_recall
|
|
value: [0.90810811 0.93513514 0.96216216 0.91351351 0.90810811 0.87567568
|
|
1. 0.98387097 0.97849462 0.94086022]
|
|
|
|
mean value: 0.9405928509154315
|
|
|
|
key: test_roc_auc
|
|
value: [0.88095238 0.88095238 0.77738095 0.78333333 0.80714286 0.80357143
|
|
0.87857143 0.8047619 0.92738095 0.85238095]
|
|
|
|
mean value: 0.8396428571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.95405405 0.96756757 0.98108108 0.95675676 0.95405405 0.93783784
|
|
1. 0.99193548 0.98924731 0.97043011]
|
|
|
|
mean value: 0.9702964254577158
|
|
|
|
key: test_jcc
|
|
value: [0.77272727 0.79166667 0.67857143 0.60869565 0.65217391 0.69230769
|
|
0.7826087 0.66666667 0.86363636 0.72727273]
|
|
|
|
mean value: 0.7236327078718383
|
|
|
|
key: train_jcc
|
|
value: [0.90810811 0.93513514 0.96216216 0.91351351 0.90810811 0.87567568
|
|
1. 0.98387097 0.97849462 0.94086022]
|
|
|
|
mean value: 0.9405928509154315
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01553535 0.01531935 0.01529002 0.0152123 0.01528549 0.01499939
|
|
0.03626537 0.03717756 0.02597475 0.01628184]
|
|
|
|
mean value: 0.020734143257141114
|
|
|
|
key: score_time
|
|
value: [0.01195645 0.01196551 0.01195502 0.01191282 0.01187205 0.01197076
|
|
0.02354455 0.02118874 0.01196527 0.01193905]
|
|
|
|
mean value: 0.014027023315429687
|
|
|
|
key: test_mcc
|
|
value: [0.81322028 0.95346259 0.95238095 0.90649828 0.95227002 0.85441771
|
|
0.95227002 0.75714286 0.90649828 0.90238095]
|
|
|
|
mean value: 0.8950541932543747
|
|
|
|
key: train_mcc
|
|
value: [0.97310093 0.96779381 0.96788166 0.96239138 0.96261632 0.9734012
|
|
0.96261094 0.97849275 0.96787795 0.96787795]
|
|
|
|
mean value: 0.9684044882348298
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.97619048 0.97560976 0.95121951 0.97560976 0.92682927
|
|
0.97560976 0.87804878 0.95121951 0.95121951]
|
|
|
|
mean value: 0.9466318234610918
|
|
|
|
key: train_accuracy
|
|
value: [0.98648649 0.98378378 0.98382749 0.98113208 0.98113208 0.98652291
|
|
0.98113208 0.98921833 0.98382749 0.98382749]
|
|
|
|
mean value: 0.9840890216361915
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.97674419 0.97560976 0.95454545 0.97674419 0.93023256
|
|
0.97435897 0.87804878 0.94736842 0.95 ]
|
|
|
|
mean value: 0.9472743225865894
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.98659517 0.98395722 0.98395722 0.98123324 0.98133333 0.98666667
|
|
0.98143236 0.98930481 0.98404255 0.98404255]
|
|
|
|
mean value: 0.9842565136693145
|
|
|
|
key: test_precision
|
|
value: [0.86956522 0.95454545 1. 0.91304348 0.95454545 0.90909091
|
|
1. 0.85714286 1. 0.95 ]
|
|
|
|
mean value: 0.9407933370976849
|
|
|
|
key: train_precision
|
|
value: [0.9787234 0.97354497 0.97354497 0.97340426 0.96842105 0.97368421
|
|
0.96858639 0.98404255 0.97368421 0.97368421]
|
|
|
|
mean value: 0.9741320231500986
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 0.95238095 1. 1. 0.95238095
|
|
0.95 0.9 0.9 0.95 ]
|
|
|
|
mean value: 0.9557142857142857
|
|
|
|
key: train_recall
|
|
value: [0.99459459 0.99459459 0.99459459 0.98918919 0.99459459 1.
|
|
0.99462366 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9946062191223481
|
|
|
|
key: test_roc_auc
|
|
value: [0.9047619 0.97619048 0.97619048 0.95 0.975 0.92619048
|
|
0.975 0.87857143 0.95 0.95119048]
|
|
|
|
mean value: 0.9463095238095238
|
|
|
|
key: train_roc_auc
|
|
value: [0.98648649 0.98378378 0.98385644 0.98115373 0.98116827 0.98655914
|
|
0.98109561 0.98920372 0.98379831 0.98379831]
|
|
|
|
mean value: 0.9840903807032839
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.95454545 0.95238095 0.91304348 0.95454545 0.86956522
|
|
0.95 0.7826087 0.9 0.9047619 ]
|
|
|
|
mean value: 0.9014784490871447
|
|
|
|
key: train_jcc
|
|
value: [0.97354497 0.96842105 0.96842105 0.96315789 0.96335079 0.97368421
|
|
0.96354167 0.97883598 0.96858639 0.96858639]
|
|
|
|
mean value: 0.9690130389783359
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.16026402 0.28309011 0.2706182 0.31864095 0.2611227 0.25655651
|
|
0.25506783 0.29842615 0.24984217 0.2597127 ]
|
|
|
|
mean value: 0.26133413314819337
|
|
|
|
key: score_time
|
|
value: [0.02092099 0.02027893 0.02040696 0.02240181 0.02012062 0.02204394
|
|
0.02170444 0.02185845 0.02282166 0.02177119]
|
|
|
|
mean value: 0.021432900428771974
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.9047619 0.95238095 0.90238095 0.95238095 0.90238095
|
|
0.95227002 0.60952381 0.95227002 0.80817439]
|
|
|
|
mean value: 0.8889986535315402
|
|
|
|
key: train_mcc
|
|
value: [0.97843556 0.98918919 0.96788166 0.98384191 0.98384191 0.98395676
|
|
0.97849275 0.98921825 0.97849275 0.98384144]
|
|
|
|
mean value: 0.9817192164189735
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.95238095 0.97560976 0.95121951 0.97560976 0.95121951
|
|
0.97560976 0.80487805 0.97560976 0.90243902]
|
|
|
|
mean value: 0.9440766550522648
|
|
|
|
key: train_accuracy
|
|
value: [0.98918919 0.99459459 0.98382749 0.99191375 0.99191375 0.99191375
|
|
0.98921833 0.99460916 0.98921833 0.99191375]
|
|
|
|
mean value: 0.9908312085670576
|
|
|
|
key: test_fscore
|
|
value: [0.97674419 0.95238095 0.97560976 0.95238095 0.97560976 0.95238095
|
|
0.97435897 0.8 0.97435897 0.89473684]
|
|
|
|
mean value: 0.9428561346207702
|
|
|
|
key: train_fscore
|
|
value: [0.98924731 0.99459459 0.98395722 0.99191375 0.99191375 0.9919571
|
|
0.98930481 0.99462366 0.98930481 0.9919571 ]
|
|
|
|
mean value: 0.9908774109633054
|
|
|
|
key: test_precision
|
|
value: [0.95454545 0.95238095 1. 0.95238095 1. 0.95238095
|
|
1. 0.8 1. 0.94444444]
|
|
|
|
mean value: 0.9556132756132756
|
|
|
|
key: train_precision
|
|
value: [0.98395722 0.99459459 0.97354497 0.98924731 0.98924731 0.98404255
|
|
0.98404255 0.99462366 0.98404255 0.98930481]
|
|
|
|
mean value: 0.9866647539369491
|
|
|
|
key: test_recall
|
|
value: [1. 0.95238095 0.95238095 0.95238095 0.95238095 0.95238095
|
|
0.95 0.8 0.95 0.85 ]
|
|
|
|
mean value: 0.9311904761904761
|
|
|
|
key: train_recall
|
|
value: [0.99459459 0.99459459 0.99459459 0.99459459 0.99459459 1.
|
|
0.99462366 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9951467596628887
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.95238095 0.97619048 0.95119048 0.97619048 0.95119048
|
|
0.975 0.8047619 0.975 0.90119048]
|
|
|
|
mean value: 0.9439285714285715
|
|
|
|
key: train_roc_auc
|
|
value: [0.98918919 0.99459459 0.98385644 0.99192095 0.99192095 0.99193548
|
|
0.98920372 0.99460913 0.98920372 0.99190642]
|
|
|
|
mean value: 0.9908340598663179
|
|
|
|
key: test_jcc
|
|
value: [0.95454545 0.90909091 0.95238095 0.90909091 0.95238095 0.90909091
|
|
0.95 0.66666667 0.95 0.80952381]
|
|
|
|
mean value: 0.8962770562770562
|
|
|
|
key: train_jcc
|
|
value: [0.9787234 0.98924731 0.96842105 0.98395722 0.98395722 0.98404255
|
|
0.97883598 0.98930481 0.97883598 0.98404255]
|
|
|
|
mean value: 0.981936808410669
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02619624 0.03526211 0.03441119 0.03048706 0.0443604 0.03741527
|
|
0.0345211 0.0420258 0.04124093 0.04728723]
|
|
|
|
mean value: 0.037320733070373535
|
|
|
|
key: score_time
|
|
value: [0.00984049 0.01518893 0.01197791 0.01190042 0.01453376 0.01460385
|
|
0.01199198 0.01444697 0.01451516 0.01206589]
|
|
|
|
mean value: 0.013106536865234376
|
|
|
|
key: test_mcc
|
|
value: [0.85811633 0.90889326 0.90238095 0.80817439 0.90692382 0.8047619
|
|
0.90238095 0.65952381 0.71121921 0.7565654 ]
|
|
|
|
mean value: 0.8218940034262068
|
|
|
|
key: train_mcc
|
|
value: [0.9135669 0.8974153 0.90317988 0.87612986 0.89239625 0.91380162
|
|
0.92993112 0.92452775 0.89264025 0.90846996]
|
|
|
|
mean value: 0.9052058897197073
|
|
|
|
key: test_accuracy
|
|
value: [0.92857143 0.95238095 0.95121951 0.90243902 0.95121951 0.90243902
|
|
0.95121951 0.82926829 0.85365854 0.87804878]
|
|
|
|
mean value: 0.9100464576074332
|
|
|
|
key: train_accuracy
|
|
value: [0.95675676 0.94864865 0.95148248 0.93800539 0.94609164 0.95687332
|
|
0.96495957 0.96226415 0.94609164 0.9541779 ]
|
|
|
|
mean value: 0.952535149704961
|
|
|
|
key: test_fscore
|
|
value: [0.92682927 0.95454545 0.95238095 0.90909091 0.95 0.9047619
|
|
0.95 0.82926829 0.85714286 0.87179487]
|
|
|
|
mean value: 0.9105814510692559
|
|
|
|
key: train_fscore
|
|
value: [0.95698925 0.94906166 0.95187166 0.9383378 0.94652406 0.95698925
|
|
0.96514745 0.96236559 0.94708995 0.95466667]
|
|
|
|
mean value: 0.9529043338593334
|
|
|
|
key: test_precision
|
|
value: [0.95 0.91304348 0.95238095 0.86956522 1. 0.9047619
|
|
0.95 0.80952381 0.81818182 0.89473684]
|
|
|
|
mean value: 0.9062194022605922
|
|
|
|
key: train_precision
|
|
value: [0.95187166 0.94148936 0.94179894 0.93085106 0.93650794 0.95187166
|
|
0.96256684 0.96236559 0.93229167 0.94708995]
|
|
|
|
mean value: 0.9458704669421064
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 0.95238095 0.95238095 0.9047619 0.9047619
|
|
0.95 0.85 0.9 0.85 ]
|
|
|
|
mean value: 0.9169047619047619
|
|
|
|
key: train_recall
|
|
value: [0.96216216 0.95675676 0.96216216 0.94594595 0.95675676 0.96216216
|
|
0.96774194 0.96236559 0.96236559 0.96236559]
|
|
|
|
mean value: 0.9600784655623366
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 0.95238095 0.95119048 0.90119048 0.95238095 0.90238095
|
|
0.95119048 0.8297619 0.8547619 0.87738095]
|
|
|
|
mean value: 0.9101190476190476
|
|
|
|
key: train_roc_auc
|
|
value: [0.95675676 0.94864865 0.95151119 0.93802674 0.94612031 0.95688753
|
|
0.96495205 0.96226388 0.94604766 0.95415577]
|
|
|
|
mean value: 0.9525370531822145
|
|
|
|
key: test_jcc
|
|
value: [0.86363636 0.91304348 0.90909091 0.83333333 0.9047619 0.82608696
|
|
0.9047619 0.70833333 0.75 0.77272727]
|
|
|
|
mean value: 0.838577545642763
|
|
|
|
key: train_jcc
|
|
value: [0.91752577 0.90306122 0.90816327 0.88383838 0.89847716 0.91752577
|
|
0.93264249 0.92746114 0.89949749 0.91326531]
|
|
|
|
mean value: 0.9101457997889101
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.94551182 1.15909123 1.07203174 1.16645551 0.98121929 1.43863583
|
|
1.14272809 1.11828685 1.16250324 1.51433372]
|
|
|
|
mean value: 1.170079731941223
|
|
|
|
key: score_time
|
|
value: [0.01489878 0.01527166 0.01541162 0.01551175 0.01527882 0.0123558
|
|
0.01570058 0.01530385 0.01655555 0.01252007]
|
|
|
|
mean value: 0.014880847930908204
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.95346259 0.95238095 1. 1. 0.8047619
|
|
0.95227002 0.61969655 0.90649828 0.86240942]
|
|
|
|
mean value: 0.9004942292006795
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.97619048 0.97560976 1. 1. 0.90243902
|
|
0.97560976 0.80487805 0.95121951 0.92682927]
|
|
|
|
mean value: 0.9488966318234611
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.97674419 0.97560976 1. 1. 0.9047619
|
|
0.97435897 0.77777778 0.94736842 0.91891892]
|
|
|
|
mean value: 0.9451149695111841
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95454545 1. 1. 1. 0.9047619
|
|
1. 0.875 1. 1. ]
|
|
|
|
mean value: 0.9734307359307359
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 0.95238095 1. 1. 0.9047619
|
|
0.95 0.7 0.9 0.85 ]
|
|
|
|
mean value: 0.920952380952381
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.97619048 0.97619048 1. 1. 0.90238095
|
|
0.975 0.80238095 0.95 0.925 ]
|
|
|
|
mean value: 0.9483333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.95454545 0.95238095 1. 1. 0.82608696
|
|
0.95 0.63636364 0.9 0.85 ]
|
|
|
|
mean value: 0.9021757952192735
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01404858 0.01055026 0.01084566 0.00979495 0.01662517 0.01392055
|
|
0.01371741 0.00998712 0.01174212 0.01603532]
|
|
|
|
mean value: 0.012726712226867675
|
|
|
|
key: score_time
|
|
value: [0.01750827 0.00957966 0.00959706 0.00905037 0.01484847 0.01271057
|
|
0.01184988 0.00949311 0.01281643 0.01073074]
|
|
|
|
mean value: 0.011818456649780273
|
|
|
|
key: test_mcc
|
|
value: [0.47673129 0.43656413 0.57570364 0.36718832 0.86240942 0.66432098
|
|
0.38060103 0.37171226 0.58066054 0.6133669 ]
|
|
|
|
mean value: 0.5329258503356614
|
|
|
|
key: train_mcc
|
|
value: [0.60428805 0.56402679 0.57872595 0.56557984 0.58046478 0.60326615
|
|
0.60448508 0.57118216 0.6277874 0.64661665]
|
|
|
|
mean value: 0.5946422853951159
|
|
|
|
key: test_accuracy
|
|
value: [0.73809524 0.71428571 0.7804878 0.68292683 0.92682927 0.82926829
|
|
0.68292683 0.68292683 0.7804878 0.80487805]
|
|
|
|
mean value: 0.7623112659698026
|
|
|
|
key: train_accuracy
|
|
value: [0.8 0.77567568 0.78706199 0.77897574 0.78706199 0.79784367
|
|
0.80053908 0.78167116 0.81132075 0.81940701]
|
|
|
|
mean value: 0.7939557077292926
|
|
|
|
key: test_fscore
|
|
value: [0.74418605 0.73913043 0.80851064 0.71111111 0.93333333 0.84444444
|
|
0.71111111 0.69767442 0.8 0.80952381]
|
|
|
|
mean value: 0.779902534772057
|
|
|
|
key: train_fscore
|
|
value: [0.81122449 0.79706601 0.79898219 0.795 0.80100756 0.81203008
|
|
0.81122449 0.79900744 0.82323232 0.83291771]
|
|
|
|
mean value: 0.808169228755668
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.68 0.73076923 0.66666667 0.875 0.79166667
|
|
0.64 0.65217391 0.72 0.77272727]
|
|
|
|
mean value: 0.7256276477146042
|
|
|
|
key: train_precision
|
|
value: [0.76811594 0.72767857 0.75480769 0.73953488 0.75 0.75700935
|
|
0.77184466 0.74193548 0.77619048 0.77674419]
|
|
|
|
mean value: 0.7563861241582702
|
|
|
|
key: test_recall
|
|
value: [0.76190476 0.80952381 0.9047619 0.76190476 1. 0.9047619
|
|
0.8 0.75 0.9 0.85 ]
|
|
|
|
mean value: 0.8442857142857143
|
|
|
|
key: train_recall
|
|
value: [0.85945946 0.88108108 0.84864865 0.85945946 0.85945946 0.87567568
|
|
0.85483871 0.8655914 0.87634409 0.89784946]
|
|
|
|
mean value: 0.8678407439697762
|
|
|
|
key: test_roc_auc
|
|
value: [0.73809524 0.71428571 0.77738095 0.68095238 0.925 0.82738095
|
|
0.68571429 0.68452381 0.78333333 0.80595238]
|
|
|
|
mean value: 0.7622619047619048
|
|
|
|
key: train_roc_auc
|
|
value: [0.8 0.77567568 0.78722755 0.7791921 0.78725661 0.79805289
|
|
0.80039233 0.78144435 0.81114502 0.819195 ]
|
|
|
|
mean value: 0.7939581517000872
|
|
|
|
key: test_jcc
|
|
value: [0.59259259 0.5862069 0.67857143 0.55172414 0.875 0.73076923
|
|
0.55172414 0.53571429 0.66666667 0.68 ]
|
|
|
|
mean value: 0.6448969376727998
|
|
|
|
key: train_jcc
|
|
value: [0.68240343 0.66260163 0.66525424 0.65975104 0.66806723 0.6835443
|
|
0.68240343 0.66528926 0.69957082 0.71367521]
|
|
|
|
mean value: 0.6782560583614013
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00981188 0.01756334 0.01116681 0.00998116 0.01008105 0.01002264
|
|
0.01777577 0.01780438 0.01079798 0.00976753]
|
|
|
|
mean value: 0.012477254867553711
|
|
|
|
key: score_time
|
|
value: [0.00883985 0.01474833 0.0101068 0.00893354 0.00892401 0.00922704
|
|
0.01536894 0.00982237 0.00946569 0.00879431]
|
|
|
|
mean value: 0.010423088073730468
|
|
|
|
key: test_mcc
|
|
value: [0.52620136 0.4472136 0.66668392 0.7197263 0.80907152 0.41963703
|
|
0.46428571 0.6133669 0.51190476 0.6133669 ]
|
|
|
|
mean value: 0.5791458007819006
|
|
|
|
key: train_mcc
|
|
value: [0.6606283 0.64358181 0.66088006 0.6661434 0.65009172 0.67407311
|
|
0.66454603 0.65225276 0.67687355 0.64866961]
|
|
|
|
mean value: 0.6597740339245326
|
|
|
|
key: test_accuracy
|
|
value: [0.76190476 0.71428571 0.82926829 0.85365854 0.90243902 0.70731707
|
|
0.73170732 0.80487805 0.75609756 0.80487805]
|
|
|
|
mean value: 0.7866434378629501
|
|
|
|
key: train_accuracy
|
|
value: [0.82972973 0.82162162 0.83018868 0.8328841 0.82479784 0.83557951
|
|
0.83018868 0.82479784 0.83827493 0.82210243]
|
|
|
|
mean value: 0.8290165367523858
|
|
|
|
key: test_fscore
|
|
value: [0.77272727 0.75 0.82051282 0.86956522 0.9 0.73913043
|
|
0.73170732 0.80952381 0.75 0.80952381]
|
|
|
|
mean value: 0.7952690681534796
|
|
|
|
key: train_fscore
|
|
value: [0.83464567 0.82446809 0.83289125 0.83510638 0.82758621 0.84237726
|
|
0.83969466 0.83290488 0.84126984 0.83248731]
|
|
|
|
mean value: 0.8343431543661086
|
|
|
|
key: test_precision
|
|
value: [0.73913043 0.66666667 0.88888889 0.8 0.94736842 0.68
|
|
0.71428571 0.77272727 0.75 0.77272727]
|
|
|
|
mean value: 0.7731794671131056
|
|
|
|
key: train_precision
|
|
value: [0.81122449 0.81151832 0.81770833 0.82198953 0.8125 0.80693069
|
|
0.79710145 0.79802956 0.828125 0.78846154]
|
|
|
|
mean value: 0.8093588913988847
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.85714286 0.76190476 0.95238095 0.85714286 0.80952381
|
|
0.75 0.85 0.75 0.85 ]
|
|
|
|
mean value: 0.8247619047619047
|
|
|
|
key: train_recall
|
|
value: [0.85945946 0.83783784 0.84864865 0.84864865 0.84324324 0.88108108
|
|
0.88709677 0.87096774 0.85483871 0.88172043]
|
|
|
|
mean value: 0.8613542574832898
|
|
|
|
key: test_roc_auc
|
|
value: [0.76190476 0.71428571 0.83095238 0.85119048 0.90357143 0.7047619
|
|
0.73214286 0.80595238 0.75595238 0.80595238]
|
|
|
|
mean value: 0.7866666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.82972973 0.82162162 0.8302383 0.83292647 0.82484743 0.83570183
|
|
0.83003487 0.82467306 0.83823017 0.8219413 ]
|
|
|
|
mean value: 0.828994478349317
|
|
|
|
key: test_jcc
|
|
value: [0.62962963 0.6 0.69565217 0.76923077 0.81818182 0.5862069
|
|
0.57692308 0.68 0.6 0.68 ]
|
|
|
|
mean value: 0.6635824364430062
|
|
|
|
key: train_jcc
|
|
value: [0.71621622 0.70135747 0.71363636 0.71689498 0.70588235 0.72767857
|
|
0.72368421 0.71365639 0.7260274 0.71304348]
|
|
|
|
mean value: 0.7158077421167284
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00926328 0.0102694 0.01025152 0.01143193 0.01130676 0.01162267
|
|
0.01116467 0.01086497 0.01144218 0.01224303]
|
|
|
|
mean value: 0.010986042022705079
|
|
|
|
key: score_time
|
|
value: [0.01719236 0.01383495 0.01737165 0.0197041 0.02015567 0.01965952
|
|
0.01807666 0.01768923 0.02050591 0.02003956]
|
|
|
|
mean value: 0.018422961235046387
|
|
|
|
key: test_mcc
|
|
value: [0.38138504 0.33485541 0.52420964 0.51551459 0.65871309 0.66668392
|
|
0.53206577 0.36515617 0.27179142 0.2681441 ]
|
|
|
|
mean value: 0.451851915569425
|
|
|
|
key: train_mcc
|
|
value: [0.65140475 0.63994485 0.65225276 0.65953152 0.62870716 0.6516517
|
|
0.65692704 0.65817862 0.60723254 0.62568858]
|
|
|
|
mean value: 0.6431519525144919
|
|
|
|
key: test_accuracy
|
|
value: [0.69047619 0.66666667 0.75609756 0.75609756 0.82926829 0.82926829
|
|
0.75609756 0.68292683 0.63414634 0.63414634]
|
|
|
|
mean value: 0.7235191637630662
|
|
|
|
key: train_accuracy
|
|
value: [0.82432432 0.81891892 0.82479784 0.82749326 0.81132075 0.82479784
|
|
0.82749326 0.82749326 0.8032345 0.81132075]
|
|
|
|
mean value: 0.8201194725723028
|
|
|
|
key: test_fscore
|
|
value: [0.68292683 0.65 0.73684211 0.75 0.8372093 0.82051282
|
|
0.70588235 0.66666667 0.57142857 0.59459459]
|
|
|
|
mean value: 0.7016063243000862
|
|
|
|
key: train_fscore
|
|
value: [0.81586402 0.81126761 0.81586402 0.81609195 0.79651163 0.81690141
|
|
0.82122905 0.81920904 0.79889807 0.80225989]
|
|
|
|
mean value: 0.8114096689798598
|
|
|
|
key: test_precision
|
|
value: [0.7 0.68421053 0.82352941 0.78947368 0.81818182 0.88888889
|
|
0.85714286 0.68421053 0.66666667 0.64705882]
|
|
|
|
mean value: 0.7559363203016454
|
|
|
|
key: train_precision
|
|
value: [0.85714286 0.84705882 0.85714286 0.87116564 0.86163522 0.85294118
|
|
0.85465116 0.86309524 0.81920904 0.8452381 ]
|
|
|
|
mean value: 0.8529280114255333
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.61904762 0.66666667 0.71428571 0.85714286 0.76190476
|
|
0.6 0.65 0.5 0.55 ]
|
|
|
|
mean value: 0.6585714285714286
|
|
|
|
key: train_recall
|
|
value: [0.77837838 0.77837838 0.77837838 0.76756757 0.74054054 0.78378378
|
|
0.79032258 0.77956989 0.77956989 0.76344086]
|
|
|
|
mean value: 0.7739930252833479
|
|
|
|
key: test_roc_auc
|
|
value: [0.69047619 0.66666667 0.75833333 0.75714286 0.82857143 0.83095238
|
|
0.75238095 0.68214286 0.63095238 0.63214286]
|
|
|
|
mean value: 0.7229761904761904
|
|
|
|
key: train_roc_auc
|
|
value: [0.82432432 0.81891892 0.82467306 0.82733217 0.81113049 0.82468759
|
|
0.82759372 0.82762278 0.80329846 0.81145016]
|
|
|
|
mean value: 0.8201031676838129
|
|
|
|
key: test_jcc
|
|
value: [0.51851852 0.48148148 0.58333333 0.6 0.72 0.69565217
|
|
0.54545455 0.5 0.4 0.42307692]
|
|
|
|
mean value: 0.5467516975777845
|
|
|
|
key: train_jcc
|
|
value: [0.68899522 0.68246445 0.68899522 0.68932039 0.66183575 0.69047619
|
|
0.69668246 0.6937799 0.66513761 0.66981132]
|
|
|
|
mean value: 0.6827498517411101
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01881528 0.01888156 0.01720691 0.01661062 0.02828383 0.01732945
|
|
0.01692677 0.01839519 0.01678467 0.01690912]
|
|
|
|
mean value: 0.01861433982849121
|
|
|
|
key: score_time
|
|
value: [0.01185322 0.01096296 0.01071048 0.01054764 0.01315546 0.01148176
|
|
0.0106287 0.01134944 0.01050234 0.01056862]
|
|
|
|
mean value: 0.011176061630249024
|
|
|
|
key: test_mcc
|
|
value: [0.63059263 0.74535599 0.7098505 0.61152662 0.8547619 0.61969655
|
|
0.8213423 0.56190476 0.66668392 0.6133669 ]
|
|
|
|
mean value: 0.6835082074285657
|
|
|
|
key: train_mcc
|
|
value: [0.80625522 0.84682617 0.8025478 0.81479313 0.78510219 0.78137629
|
|
0.81394491 0.83137227 0.8097008 0.8097008 ]
|
|
|
|
mean value: 0.8101619588671509
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.85714286 0.85365854 0.80487805 0.92682927 0.80487805
|
|
0.90243902 0.7804878 0.82926829 0.80487805]
|
|
|
|
mean value: 0.8373983739837398
|
|
|
|
key: train_accuracy
|
|
value: [0.9 0.92162162 0.90026954 0.90566038 0.88948787 0.88679245
|
|
0.90566038 0.91374663 0.90296496 0.90296496]
|
|
|
|
mean value: 0.9029168791432942
|
|
|
|
key: test_fscore
|
|
value: [0.82608696 0.875 0.86363636 0.81818182 0.92682927 0.82608696
|
|
0.90909091 0.7804878 0.8372093 0.80952381]
|
|
|
|
mean value: 0.8472133188972693
|
|
|
|
key: train_fscore
|
|
value: [0.90585242 0.9250646 0.90339426 0.90956072 0.8956743 0.89393939
|
|
0.90956072 0.91794872 0.90769231 0.90769231]
|
|
|
|
mean value: 0.9076379747216281
|
|
|
|
key: test_precision
|
|
value: [0.76 0.77777778 0.82608696 0.7826087 0.95 0.76
|
|
0.83333333 0.76190476 0.7826087 0.77272727]
|
|
|
|
mean value: 0.8007047493569233
|
|
|
|
key: train_precision
|
|
value: [0.85576923 0.88613861 0.87373737 0.87128713 0.84615385 0.83886256
|
|
0.87562189 0.87745098 0.86764706 0.86764706]
|
|
|
|
mean value: 0.8660315741062894
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 0.9047619 0.85714286 0.9047619 0.9047619
|
|
1. 0.8 0.9 0.85 ]
|
|
|
|
mean value: 0.9026190476190477
|
|
|
|
key: train_recall
|
|
value: [0.96216216 0.96756757 0.93513514 0.95135135 0.95135135 0.95675676
|
|
0.94623656 0.96236559 0.9516129 0.9516129 ]
|
|
|
|
mean value: 0.9536152281313572
|
|
|
|
key: test_roc_auc
|
|
value: [0.80952381 0.85714286 0.85238095 0.80357143 0.92738095 0.80238095
|
|
0.9047619 0.78095238 0.83095238 0.80595238]
|
|
|
|
mean value: 0.8374999999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.9 0.92162162 0.90036327 0.9057832 0.88965417 0.88698053
|
|
0.90555071 0.91361523 0.90283348 0.90283348]
|
|
|
|
mean value: 0.9029235687300203
|
|
|
|
key: test_jcc
|
|
value: [0.7037037 0.77777778 0.76 0.69230769 0.86363636 0.7037037
|
|
0.83333333 0.64 0.72 0.68 ]
|
|
|
|
mean value: 0.7374462574462575
|
|
|
|
key: train_jcc
|
|
value: [0.82790698 0.86057692 0.82380952 0.83412322 0.81105991 0.80821918
|
|
0.83412322 0.84834123 0.83098592 0.83098592]
|
|
|
|
mean value: 0.831013201825796
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.42117834 1.50192738 1.74208856 1.39239717 1.81255579 1.65309405
|
|
2.01920605 2.15930629 2.04894686 1.53648686]
|
|
|
|
mean value: 1.7287187337875367
|
|
|
|
key: score_time
|
|
value: [0.01234365 0.02283001 0.01450133 0.01485205 0.01476645 0.01467419
|
|
0.01476502 0.03691173 0.01803923 0.01702905]
|
|
|
|
mean value: 0.01807126998901367
|
|
|
|
key: test_mcc
|
|
value: [0.85811633 1. 0.75714286 0.8547619 1. 0.75714286
|
|
0.90649828 0.66432098 0.90649828 0.81975606]
|
|
|
|
mean value: 0.852423754766439
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92857143 1. 0.87804878 0.92682927 1. 0.87804878
|
|
0.95121951 0.82926829 0.95121951 0.90243902]
|
|
|
|
mean value: 0.9245644599303136
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92682927 1. 0.87804878 0.92682927 1. 0.87804878
|
|
0.94736842 0.81081081 0.94736842 0.88888889]
|
|
|
|
mean value: 0.9204192639365938
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95 1. 0.9 0.95 1. 0.9
|
|
1. 0.88235294 1. 1. ]
|
|
|
|
mean value: 0.9582352941176471
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 0.85714286 0.9047619 1. 0.85714286
|
|
0.9 0.75 0.9 0.8 ]
|
|
|
|
mean value: 0.8873809523809524
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 1. 0.87857143 0.92738095 1. 0.87857143
|
|
0.95 0.82738095 0.95 0.9 ]
|
|
|
|
mean value: 0.924047619047619
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.86363636 1. 0.7826087 0.86363636 1. 0.7826087
|
|
0.9 0.68181818 0.9 0.8 ]
|
|
|
|
mean value: 0.8574308300395257
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02964616 0.01735234 0.02308035 0.01720619 0.01867843 0.01552367
|
|
0.02575731 0.01813507 0.01478839 0.01864028]
|
|
|
|
mean value: 0.01988081932067871
|
|
|
|
key: score_time
|
|
value: [0.01226211 0.00949502 0.01432991 0.00976801 0.00871301 0.00870228
|
|
0.01428056 0.00927043 0.0092001 0.00945854]
|
|
|
|
mean value: 0.010547995567321777
|
|
|
|
key: test_mcc
|
|
value: [0.90889326 0.9047619 1. 0.90238095 0.90692382 0.95238095
|
|
0.95227002 0.8047619 0.95227002 0.95227002]
|
|
|
|
mean value: 0.9236912842968128
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.95238095 1. 0.95121951 0.95121951 0.97560976
|
|
0.97560976 0.90243902 0.97560976 0.97560976]
|
|
|
|
mean value: 0.9612078977932637
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95 0.95238095 1. 0.95238095 0.95 0.97560976
|
|
0.97435897 0.9 0.97435897 0.97435897]
|
|
|
|
mean value: 0.9603448583936389
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95238095 1. 0.95238095 1. 1.
|
|
1. 0.9 1. 1. ]
|
|
|
|
mean value: 0.9804761904761905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9047619 0.95238095 1. 0.95238095 0.9047619 0.95238095
|
|
0.95 0.9 0.95 0.95 ]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.95238095 1. 0.95119048 0.95238095 0.97619048
|
|
0.975 0.90238095 0.975 0.975 ]
|
|
|
|
mean value: 0.9611904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9047619 0.90909091 1. 0.90909091 0.9047619 0.95238095
|
|
0.95 0.81818182 0.95 0.95 ]
|
|
|
|
mean value: 0.9248268398268398
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11855721 0.12169838 0.12727523 0.12549639 0.12631321 0.12417126
|
|
0.13257003 0.12751269 0.13062501 0.1096518 ]
|
|
|
|
mean value: 0.12438712120056153
|
|
|
|
key: score_time
|
|
value: [0.02752948 0.02243638 0.01942682 0.01884174 0.01758575 0.01780415
|
|
0.01768422 0.01769805 0.01859951 0.01751542]
|
|
|
|
mean value: 0.019512152671813963
|
|
|
|
key: test_mcc
|
|
value: [0.90889326 0.8660254 0.90649828 0.90238095 0.95238095 0.75714286
|
|
1. 0.8547619 0.85441771 1. ]
|
|
|
|
mean value: 0.9002501316790256
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.92857143 0.95121951 0.95121951 0.97560976 0.87804878
|
|
1. 0.92682927 0.92682927 1. ]
|
|
|
|
mean value: 0.9490708478513357
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95454545 0.93333333 0.95454545 0.95238095 0.97560976 0.87804878
|
|
1. 0.92682927 0.92307692 1. ]
|
|
|
|
mean value: 0.9498369922760166
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91304348 0.875 0.91304348 0.95238095 1. 0.9
|
|
1. 0.9047619 0.94736842 1. ]
|
|
|
|
mean value: 0.9405598234717227
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.95238095 0.95238095 0.85714286
|
|
1. 0.95 0.9 1. ]
|
|
|
|
mean value: 0.9611904761904762
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.92857143 0.95 0.95119048 0.97619048 0.87857143
|
|
1. 0.92738095 0.92619048 1. ]
|
|
|
|
mean value: 0.949047619047619
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91304348 0.875 0.91304348 0.90909091 0.95238095 0.7826087
|
|
1. 0.86363636 0.85714286 1. ]
|
|
|
|
mean value: 0.9065946734424996
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.77
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00960135 0.00979376 0.00971293 0.00982785 0.00957465 0.00961232
|
|
0.00974131 0.00961852 0.00998235 0.01082802]
|
|
|
|
mean value: 0.009829306602478027
|
|
|
|
key: score_time
|
|
value: [0.00871539 0.00880337 0.00862288 0.00876284 0.00870943 0.00873494
|
|
0.00865674 0.00874162 0.00948977 0.00937748]
|
|
|
|
mean value: 0.008861446380615234
|
|
|
|
key: test_mcc
|
|
value: [0.71754731 0.58834841 0.62325386 0.59982886 0.6806903 0.56190476
|
|
0.65871309 0.7633652 0.81975606 0.65915306]
|
|
|
|
mean value: 0.6672560912173882
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.78571429 0.80487805 0.7804878 0.82926829 0.7804878
|
|
0.82926829 0.87804878 0.90243902 0.80487805]
|
|
|
|
mean value: 0.8252613240418119
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85 0.75675676 0.78947368 0.74285714 0.81081081 0.7804878
|
|
0.82051282 0.86486486 0.88888889 0.75 ]
|
|
|
|
mean value: 0.805465277377986
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.875 0.88235294 0.92857143 0.9375 0.8
|
|
0.84210526 0.94117647 1. 1. ]
|
|
|
|
mean value: 0.9101442945599292
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.66666667 0.71428571 0.61904762 0.71428571 0.76190476
|
|
0.8 0.8 0.8 0.6 ]
|
|
|
|
mean value: 0.7285714285714285
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.85714286 0.78571429 0.80714286 0.78452381 0.83214286 0.78095238
|
|
0.82857143 0.87619048 0.9 0.8 ]
|
|
|
|
mean value: 0.8252380952380952
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.73913043 0.60869565 0.65217391 0.59090909 0.68181818 0.64
|
|
0.69565217 0.76190476 0.8 0.6 ]
|
|
|
|
mean value: 0.6770284208545078
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.48892093 1.47670484 1.47442937 1.47451377 1.4959383 1.47596216
|
|
1.51960182 1.68420792 1.62056398 1.53635812]
|
|
|
|
mean value: 1.5247201204299927
|
|
|
|
key: score_time
|
|
value: [0.09144354 0.09082866 0.09075522 0.09119344 0.09084129 0.09078074
|
|
0.09067225 0.10559177 0.09852886 0.0922277 ]
|
|
|
|
mean value: 0.09328634738922119
|
|
|
|
key: test_mcc
|
|
value: [1. 0.95346259 0.90649828 0.95227002 1. 0.90238095
|
|
1. 0.90692382 0.95238095 1. ]
|
|
|
|
mean value: 0.9573916612509499
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97619048 0.95121951 0.97560976 1. 0.95121951
|
|
1. 0.95121951 0.97560976 1. ]
|
|
|
|
mean value: 0.9781068524970964
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97674419 0.95454545 0.97674419 1. 0.95238095
|
|
1. 0.95238095 0.97560976 1. ]
|
|
|
|
mean value: 0.9788405487497943
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95454545 0.91304348 0.95454545 1. 0.95238095
|
|
1. 0.90909091 0.95238095 1. ]
|
|
|
|
mean value: 0.9635987201204592
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.95238095
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9952380952380953
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97619048 0.95 0.975 1. 0.95119048
|
|
1. 0.95238095 0.97619048 1. ]
|
|
|
|
mean value: 0.9780952380952381
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95454545 0.91304348 0.95454545 1. 0.90909091
|
|
1. 0.90909091 0.95238095 1. ]
|
|
|
|
mean value: 0.9592697157914549
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.9516983 0.93607759 0.93637466 0.98142076 1.0112915 0.97042155
|
|
0.91884851 1.02991462 1.1556201 0.91718888]
|
|
|
|
mean value: 0.980885648727417
|
|
|
|
key: score_time
|
|
value: [0.19083285 0.22782016 0.21047091 0.20850635 0.26981235 0.23706317
|
|
0.16170216 0.16438246 0.1959908 0.26817775]
|
|
|
|
mean value: 0.21347589492797853
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.95346259 0.85441771 0.95238095 1. 0.90238095
|
|
0.95238095 0.80907152 1. 0.95238095]
|
|
|
|
mean value: 0.9329938212534088
|
|
|
|
key: train_mcc
|
|
value: [0.96807684 0.96779381 0.97866529 0.96788166 0.96788166 0.9734012
|
|
0.97866283 0.96787795 0.97339739 0.98395537]
|
|
|
|
mean value: 0.9727593997295828
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.97619048 0.92682927 0.97560976 1. 0.95121951
|
|
0.97560976 0.90243902 1. 0.97560976]
|
|
|
|
mean value: 0.9659698025551684
|
|
|
|
key: train_accuracy
|
|
value: [0.98378378 0.98378378 0.98921833 0.98382749 0.98382749 0.98652291
|
|
0.98921833 0.98382749 0.98652291 0.99191375]
|
|
|
|
mean value: 0.9862446273767029
|
|
|
|
key: test_fscore
|
|
value: [0.97674419 0.97674419 0.93023256 0.97560976 1. 0.95238095
|
|
0.97560976 0.9047619 1. 0.97560976]
|
|
|
|
mean value: 0.9667693055668098
|
|
|
|
key: train_fscore
|
|
value: [0.98404255 0.98395722 0.98930481 0.98395722 0.98395722 0.98666667
|
|
0.9893617 0.98404255 0.9867374 0.992 ]
|
|
|
|
mean value: 0.9864027346296044
|
|
|
|
key: test_precision
|
|
value: [0.95454545 0.95454545 0.90909091 1. 1. 0.95238095
|
|
0.95238095 0.86363636 1. 0.95238095]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
0.9538961038961039
|
|
|
|
key: train_precision
|
|
value: [0.96858639 0.97354497 0.97883598 0.97354497 0.97354497 0.97368421
|
|
0.97894737 0.97368421 0.97382199 0.98412698]
|
|
|
|
mean value: 0.9752322050034918
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.95238095 0.95238095 1. 0.95238095
|
|
1. 0.95 1. 1. ]
|
|
|
|
mean value: 0.9807142857142856
|
|
|
|
key: train_recall
|
|
value: [1. 0.99459459 1. 0.99459459 0.99459459 1.
|
|
1. 0.99462366 1. 1. ]
|
|
|
|
mean value: 0.9978407439697763
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.97619048 0.92619048 0.97619048 1. 0.95119048
|
|
0.97619048 0.90357143 1. 0.97619048]
|
|
|
|
mean value: 0.9661904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [0.98378378 0.98378378 0.98924731 0.98385644 0.98385644 0.98655914
|
|
0.98918919 0.98379831 0.98648649 0.99189189]
|
|
|
|
mean value: 0.9862452775356001
|
|
|
|
key: test_jcc
|
|
value: [0.95454545 0.95454545 0.86956522 0.95238095 1. 0.90909091
|
|
0.95238095 0.82608696 1. 0.95238095]
|
|
|
|
mean value: 0.9370976849237719
|
|
|
|
key: train_jcc
|
|
value: [0.96858639 0.96842105 0.97883598 0.96842105 0.96842105 0.97368421
|
|
0.97894737 0.96858639 0.97382199 0.98412698]
|
|
|
|
mean value: 0.9731852464202974
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0236361 0.00996685 0.00973201 0.00977063 0.01012659 0.00971603
|
|
0.00966501 0.00985694 0.00972986 0.00974369]
|
|
|
|
mean value: 0.011194372177124023
|
|
|
|
key: score_time
|
|
value: [0.01239753 0.00912547 0.00883961 0.00879169 0.00874281 0.00875664
|
|
0.00875926 0.00876093 0.00876856 0.00884151]
|
|
|
|
mean value: 0.009178400039672852
|
|
|
|
key: test_mcc
|
|
value: [0.52620136 0.4472136 0.66668392 0.7197263 0.80907152 0.41963703
|
|
0.46428571 0.6133669 0.51190476 0.6133669 ]
|
|
|
|
mean value: 0.5791458007819006
|
|
|
|
key: train_mcc
|
|
value: [0.6606283 0.64358181 0.66088006 0.6661434 0.65009172 0.67407311
|
|
0.66454603 0.65225276 0.67687355 0.64866961]
|
|
|
|
mean value: 0.6597740339245326
|
|
|
|
key: test_accuracy
|
|
value: [0.76190476 0.71428571 0.82926829 0.85365854 0.90243902 0.70731707
|
|
0.73170732 0.80487805 0.75609756 0.80487805]
|
|
|
|
mean value: 0.7866434378629501
|
|
|
|
key: train_accuracy
|
|
value: [0.82972973 0.82162162 0.83018868 0.8328841 0.82479784 0.83557951
|
|
0.83018868 0.82479784 0.83827493 0.82210243]
|
|
|
|
mean value: 0.8290165367523858
|
|
|
|
key: test_fscore
|
|
value: [0.77272727 0.75 0.82051282 0.86956522 0.9 0.73913043
|
|
0.73170732 0.80952381 0.75 0.80952381]
|
|
|
|
mean value: 0.7952690681534796
|
|
|
|
key: train_fscore
|
|
value: [0.83464567 0.82446809 0.83289125 0.83510638 0.82758621 0.84237726
|
|
0.83969466 0.83290488 0.84126984 0.83248731]
|
|
|
|
mean value: 0.8343431543661086
|
|
|
|
key: test_precision
|
|
value: [0.73913043 0.66666667 0.88888889 0.8 0.94736842 0.68
|
|
0.71428571 0.77272727 0.75 0.77272727]
|
|
|
|
mean value: 0.7731794671131056
|
|
|
|
key: train_precision
|
|
value: [0.81122449 0.81151832 0.81770833 0.82198953 0.8125 0.80693069
|
|
0.79710145 0.79802956 0.828125 0.78846154]
|
|
|
|
mean value: 0.8093588913988847
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.85714286 0.76190476 0.95238095 0.85714286 0.80952381
|
|
0.75 0.85 0.75 0.85 ]
|
|
|
|
mean value: 0.8247619047619047
|
|
|
|
key: train_recall
|
|
value: [0.85945946 0.83783784 0.84864865 0.84864865 0.84324324 0.88108108
|
|
0.88709677 0.87096774 0.85483871 0.88172043]
|
|
|
|
mean value: 0.8613542574832898
|
|
|
|
key: test_roc_auc
|
|
value: [0.76190476 0.71428571 0.83095238 0.85119048 0.90357143 0.7047619
|
|
0.73214286 0.80595238 0.75595238 0.80595238]
|
|
|
|
mean value: 0.7866666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.82972973 0.82162162 0.8302383 0.83292647 0.82484743 0.83570183
|
|
0.83003487 0.82467306 0.83823017 0.8219413 ]
|
|
|
|
mean value: 0.828994478349317
|
|
|
|
key: test_jcc
|
|
value: [0.62962963 0.6 0.69565217 0.76923077 0.81818182 0.5862069
|
|
0.57692308 0.68 0.6 0.68 ]
|
|
|
|
mean value: 0.6635824364430062
|
|
|
|
key: train_jcc
|
|
value: [0.71621622 0.70135747 0.71363636 0.71689498 0.70588235 0.72767857
|
|
0.72368421 0.71365639 0.7260274 0.71304348]
|
|
|
|
mean value: 0.7158077421167284
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.13729072 0.0542841 0.18945885 0.05321908 0.05603051 0.0593791
|
|
0.06020927 0.05859399 0.06958175 0.06030631]
|
|
|
|
mean value: 0.07983536720275879
|
|
|
|
key: score_time
|
|
value: [0.01096654 0.01153803 0.01101851 0.01092124 0.01061153 0.01044726
|
|
0.01049089 0.01065683 0.01051927 0.01184368]
|
|
|
|
mean value: 0.010901379585266113
|
|
|
|
key: test_mcc
|
|
value: [1. 0.95346259 1. 0.95227002 0.90692382 0.95238095
|
|
0.90649828 0.90692382 0.95227002 1. ]
|
|
|
|
mean value: 0.9530729499296946
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97619048 1. 0.97560976 0.95121951 0.97560976
|
|
0.95121951 0.95121951 0.97560976 1. ]
|
|
|
|
mean value: 0.9756678281068525
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97674419 1. 0.97674419 0.95 0.97560976
|
|
0.94736842 0.95238095 0.97435897 1. ]
|
|
|
|
mean value: 0.9753206475983143
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95454545 1. 0.95454545 1. 1.
|
|
1. 0.90909091 1. 1. ]
|
|
|
|
mean value: 0.9818181818181818
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.9047619 0.95238095
|
|
0.9 1. 0.95 1. ]
|
|
|
|
mean value: 0.9707142857142858
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97619048 1. 0.975 0.95238095 0.97619048
|
|
0.95 0.95238095 0.975 1. ]
|
|
|
|
mean value: 0.9757142857142856
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95454545 1. 0.95454545 0.9047619 0.95238095
|
|
0.9 0.90909091 0.95 1. ]
|
|
|
|
mean value: 0.9525324675324676
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.88
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03848815 0.0564611 0.04007053 0.07608032 0.04673767 0.09112334
|
|
0.07648802 0.09771609 0.07483935 0.0377171 ]
|
|
|
|
mean value: 0.06357216835021973
|
|
|
|
key: score_time
|
|
value: [0.0230732 0.01212573 0.02329516 0.01226211 0.02332783 0.03573036
|
|
0.02186012 0.03916335 0.01241779 0.02289486]
|
|
|
|
mean value: 0.02261505126953125
|
|
|
|
key: test_mcc
|
|
value: [0.9047619 0.90889326 0.90692382 0.8547619 0.90692382 0.76500781
|
|
0.85441771 0.65871309 0.90649828 0.7633652 ]
|
|
|
|
mean value: 0.8430266801146862
|
|
|
|
key: train_mcc
|
|
value: [0.98379816 0.99460913 0.98921825 0.98921825 0.98921825 0.99462366
|
|
0.98384144 0.99462366 0.98921825 0.98384144]
|
|
|
|
mean value: 0.9892210469904663
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.95238095 0.95121951 0.92682927 0.95121951 0.87804878
|
|
0.92682927 0.82926829 0.95121951 0.87804878]
|
|
|
|
mean value: 0.9197444831591173
|
|
|
|
key: train_accuracy
|
|
value: [0.99189189 0.9972973 0.99460916 0.99460916 0.99460916 0.99730458
|
|
0.99191375 0.99730458 0.99460916 0.99191375]
|
|
|
|
mean value: 0.994606250455307
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.95 0.95 0.92682927 0.95 0.87179487
|
|
0.92307692 0.82051282 0.94736842 0.86486486]
|
|
|
|
mean value: 0.9156828121975747
|
|
|
|
key: train_fscore
|
|
value: [0.99191375 0.99728997 0.99459459 0.99459459 0.99459459 0.99730458
|
|
0.9919571 0.99730458 0.99462366 0.9919571 ]
|
|
|
|
mean value: 0.9946134532763986
|
|
|
|
key: test_precision
|
|
value: [0.95238095 1. 1. 0.95 1. 0.94444444
|
|
0.94736842 0.84210526 1. 0.94117647]
|
|
|
|
mean value: 0.9577475551624158
|
|
|
|
key: train_precision
|
|
value: [0.98924731 1. 0.99459459 0.99459459 0.99459459 0.99462366
|
|
0.98930481 1. 0.99462366 0.98930481]
|
|
|
|
mean value: 0.9940888033108147
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.9047619 0.9047619 0.9047619 0.9047619 0.80952381
|
|
0.9 0.8 0.9 0.8 ]
|
|
|
|
mean value: 0.8780952380952382
|
|
|
|
key: train_recall
|
|
value: [0.99459459 0.99459459 0.99459459 0.99459459 0.99459459 1.
|
|
0.99462366 0.99462366 0.99462366 0.99462366]
|
|
|
|
mean value: 0.9951467596628887
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.95238095 0.95238095 0.92738095 0.95238095 0.8797619
|
|
0.92619048 0.82857143 0.95 0.87619048]
|
|
|
|
mean value: 0.9197619047619047
|
|
|
|
key: train_roc_auc
|
|
value: [0.99189189 0.9972973 0.99460913 0.99460913 0.99460913 0.99731183
|
|
0.99190642 0.99731183 0.99460913 0.99190642]
|
|
|
|
mean value: 0.9946062191223481
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.9047619 0.9047619 0.86363636 0.9047619 0.77272727
|
|
0.85714286 0.69565217 0.9 0.76190476]
|
|
|
|
mean value: 0.8474440052700922
|
|
|
|
key: train_jcc
|
|
value: [0.98395722 0.99459459 0.98924731 0.98924731 0.98924731 0.99462366
|
|
0.98404255 0.99462366 0.98930481 0.98404255]
|
|
|
|
mean value: 0.9892930980374963
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.03393626 0.01116276 0.01050591 0.01051927 0.01106477 0.01092601
|
|
0.0112021 0.01075196 0.01055264 0.0097971 ]
|
|
|
|
mean value: 0.013041877746582031
|
|
|
|
key: score_time
|
|
value: [0.0211122 0.01032758 0.0091815 0.00963449 0.00966692 0.00959134
|
|
0.00962043 0.0095799 0.00896215 0.00962305]
|
|
|
|
mean value: 0.01072995662689209
|
|
|
|
key: test_mcc
|
|
value: [0.42857143 0.52380952 0.7633652 0.51966679 0.75714286 0.46623254
|
|
0.51190476 0.46300848 0.58066054 0.7565654 ]
|
|
|
|
mean value: 0.5770927520886003
|
|
|
|
key: train_mcc
|
|
value: [0.63036031 0.63110621 0.65040473 0.639995 0.63987066 0.65892307
|
|
0.60747259 0.64549275 0.67228752 0.65953152]
|
|
|
|
mean value: 0.6435444364715565
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.76190476 0.87804878 0.75609756 0.87804878 0.73170732
|
|
0.75609756 0.73170732 0.7804878 0.87804878]
|
|
|
|
mean value: 0.7866434378629501
|
|
|
|
key: train_accuracy
|
|
value: [0.81351351 0.81351351 0.82479784 0.81940701 0.81671159 0.82749326
|
|
0.8032345 0.82210243 0.83557951 0.82749326]
|
|
|
|
mean value: 0.8203846434035114
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.76190476 0.88888889 0.7826087 0.87804878 0.75555556
|
|
0.75 0.71794872 0.8 0.87179487]
|
|
|
|
mean value: 0.7921035986518489
|
|
|
|
key: train_fscore
|
|
value: [0.82262211 0.82352941 0.82849604 0.82414698 0.82828283 0.83589744
|
|
0.80939948 0.828125 0.84073107 0.83756345]
|
|
|
|
mean value: 0.8278793807837299
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.76190476 0.83333333 0.72 0.9 0.70833333
|
|
0.75 0.73684211 0.72 0.89473684]
|
|
|
|
mean value: 0.7739436090225564
|
|
|
|
key: train_precision
|
|
value: [0.78431373 0.7815534 0.80927835 0.80102041 0.77725118 0.79512195
|
|
0.78680203 0.8030303 0.81725888 0.79326923]
|
|
|
|
mean value: 0.794889946578593
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.76190476 0.95238095 0.85714286 0.85714286 0.80952381
|
|
0.75 0.7 0.9 0.85 ]
|
|
|
|
mean value: 0.8152380952380952
|
|
|
|
key: train_recall
|
|
value: [0.86486486 0.87027027 0.84864865 0.84864865 0.88648649 0.88108108
|
|
0.83333333 0.85483871 0.8655914 0.88709677]
|
|
|
|
mean value: 0.8640860215053764
|
|
|
|
key: test_roc_auc
|
|
value: [0.71428571 0.76190476 0.87619048 0.75357143 0.87857143 0.7297619
|
|
0.75595238 0.73095238 0.78333333 0.87738095]
|
|
|
|
mean value: 0.7861904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [0.81351351 0.81351351 0.82486196 0.81948561 0.81689916 0.82763731
|
|
0.80315315 0.82201395 0.8354984 0.82733217]
|
|
|
|
mean value: 0.8203908747457135
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.61538462 0.8 0.64285714 0.7826087 0.60714286
|
|
0.6 0.56 0.66666667 0.77272727]
|
|
|
|
mean value: 0.6602942805986285
|
|
|
|
key: train_jcc
|
|
value: [0.69868996 0.7 0.70720721 0.70089286 0.70689655 0.71806167
|
|
0.67982456 0.70666667 0.72522523 0.72052402]
|
|
|
|
mean value: 0.7063988717177541
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.018049 0.02355123 0.02099872 0.02084327 0.02586102 0.02243018
|
|
0.02418661 0.02225685 0.02579045 0.02315402]
|
|
|
|
mean value: 0.022712135314941408
|
|
|
|
key: score_time
|
|
value: [0.00903296 0.01131964 0.01169229 0.01225495 0.01198363 0.0118804
|
|
0.01187754 0.0114584 0.01192617 0.01193905]
|
|
|
|
mean value: 0.011536502838134765
|
|
|
|
key: test_mcc
|
|
value: [0.8660254 1. 0.95238095 0.90238095 1. 0.80817439
|
|
0.95227002 0.72229808 0.86240942 0.81975606]
|
|
|
|
mean value: 0.8885695274349608
|
|
|
|
key: train_mcc
|
|
value: [0.95247913 0.99460913 0.9946235 0.97849275 1. 0.94236768
|
|
0.99462366 0.94234975 0.96816407 0.98921825]
|
|
|
|
mean value: 0.9756927908121855
|
|
|
|
key: test_accuracy
|
|
value: [0.92857143 1. 0.97560976 0.95121951 1. 0.90243902
|
|
0.97560976 0.85365854 0.92682927 0.90243902]
|
|
|
|
mean value: 0.9416376306620209
|
|
|
|
key: train_accuracy
|
|
value: [0.97567568 0.9972973 0.99730458 0.98921833 1. 0.9703504
|
|
0.99730458 0.9703504 0.98382749 0.99460916]
|
|
|
|
mean value: 0.9875937932541706
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 1. 0.97560976 0.95238095 1. 0.90909091
|
|
0.97435897 0.86363636 0.91891892 0.88888889]
|
|
|
|
mean value: 0.9416218096705902
|
|
|
|
key: train_fscore
|
|
value: [0.9762533 0.99728997 0.99728997 0.98913043 1. 0.97112861
|
|
0.99730458 0.97127937 0.98360656 0.99462366]
|
|
|
|
mean value: 0.9877906456528402
|
|
|
|
key: test_precision
|
|
value: [0.875 1. 1. 0.95238095 1. 0.86956522
|
|
1. 0.79166667 1. 1. ]
|
|
|
|
mean value: 0.9488612836438923
|
|
|
|
key: train_precision
|
|
value: [0.95360825 1. 1. 0.99453552 1. 0.94387755
|
|
1. 0.94416244 1. 0.99462366]
|
|
|
|
mean value: 0.9830807410030974
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.95238095 0.95238095 1. 0.95238095
|
|
0.95 0.95 0.85 0.8 ]
|
|
|
|
mean value: 0.9407142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 0.99459459 0.99459459 0.98378378 1. 1.
|
|
0.99462366 1. 0.96774194 0.99462366]
|
|
|
|
mean value: 0.9929962220284801
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 1. 0.97619048 0.95119048 1. 0.90119048
|
|
0.975 0.85595238 0.925 0.9 ]
|
|
|
|
mean value: 0.9413095238095238
|
|
|
|
key: train_roc_auc
|
|
value: [0.97567568 0.9972973 0.9972973 0.98920372 1. 0.97043011
|
|
0.99731183 0.97027027 0.98387097 0.99460913]
|
|
|
|
mean value: 0.9875966288869515
|
|
|
|
key: test_jcc
|
|
value: [0.875 1. 0.95238095 0.90909091 1. 0.83333333
|
|
0.95 0.76 0.85 0.8 ]
|
|
|
|
mean value: 0.8929805194805195
|
|
|
|
key: train_jcc
|
|
value: [0.95360825 0.99459459 0.99459459 0.97849462 1. 0.94387755
|
|
0.99462366 0.94416244 0.96774194 0.98930481]
|
|
|
|
mean value: 0.9761002452068489
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01773119 0.01894855 0.0170114 0.01634312 0.01716518 0.01672268
|
|
0.01967621 0.01650691 0.01855946 0.01665854]
|
|
|
|
mean value: 0.01753232479095459
|
|
|
|
key: score_time
|
|
value: [0.01205349 0.01189566 0.0119071 0.01189089 0.01292086 0.01191783
|
|
0.01203084 0.01201177 0.01197481 0.01218939]
|
|
|
|
mean value: 0.012079262733459472
|
|
|
|
key: test_mcc
|
|
value: [0.80952381 0.78446454 0.95227002 0.86333169 0.78072006 0.74124932
|
|
0.8547619 0.76500781 0.95227002 0.73786479]
|
|
|
|
mean value: 0.8241463949364545
|
|
|
|
key: train_mcc
|
|
value: [0.97298719 0.83968394 0.9214168 0.93057445 0.88164335 0.7451756
|
|
0.97339739 0.95737027 0.98395537 0.81247091]
|
|
|
|
mean value: 0.9018675260545876
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.88095238 0.97560976 0.92682927 0.87804878 0.85365854
|
|
0.92682927 0.87804878 0.97560976 0.85365854]
|
|
|
|
mean value: 0.9054006968641115
|
|
|
|
key: train_accuracy
|
|
value: [0.98648649 0.91351351 0.95956873 0.96495957 0.93800539 0.85714286
|
|
0.98652291 0.97843666 0.99191375 0.89757412]
|
|
|
|
mean value: 0.9474123989218328
|
|
|
|
key: test_fscore
|
|
value: [0.9047619 0.89361702 0.97674419 0.92307692 0.86486486 0.83333333
|
|
0.92682927 0.88372093 0.97435897 0.82352941]
|
|
|
|
mean value: 0.9004836818009054
|
|
|
|
key: train_fscore
|
|
value: [0.98644986 0.92039801 0.96083551 0.96418733 0.93409742 0.83280757
|
|
0.9867374 0.97883598 0.992 0.88622754]
|
|
|
|
mean value: 0.9442576627868985
|
|
|
|
key: test_precision
|
|
value: [0.9047619 0.80769231 0.95454545 1. 1. 1.
|
|
0.9047619 0.82608696 1. 1. ]
|
|
|
|
mean value: 0.9397848528283311
|
|
|
|
key: train_precision
|
|
value: [0.98913043 0.85253456 0.92929293 0.98314607 0.99390244 1.
|
|
0.97382199 0.96354167 0.98412698 1. ]
|
|
|
|
mean value: 0.9669497073050086
|
|
|
|
key: test_recall
|
|
value: [0.9047619 1. 1. 0.85714286 0.76190476 0.71428571
|
|
0.95 0.95 0.95 0.7 ]
|
|
|
|
mean value: 0.8788095238095238
|
|
|
|
key: train_recall
|
|
value: [0.98378378 1. 0.99459459 0.94594595 0.88108108 0.71351351
|
|
1. 0.99462366 1. 0.79569892]
|
|
|
|
mean value: 0.930924149956408
|
|
|
|
key: test_roc_auc
|
|
value: [0.9047619 0.88095238 0.975 0.92857143 0.88095238 0.85714286
|
|
0.92738095 0.8797619 0.975 0.85 ]
|
|
|
|
mean value: 0.905952380952381
|
|
|
|
key: train_roc_auc
|
|
value: [0.98648649 0.91351351 0.95966289 0.96490846 0.93785237 0.85675676
|
|
0.98648649 0.97839291 0.99189189 0.89784946]
|
|
|
|
mean value: 0.9473801220575414
|
|
|
|
key: test_jcc
|
|
value: [0.82608696 0.80769231 0.95454545 0.85714286 0.76190476 0.71428571
|
|
0.86363636 0.79166667 0.95 0.7 ]
|
|
|
|
mean value: 0.8226961082395865
|
|
|
|
key: train_jcc
|
|
value: [0.97326203 0.85253456 0.92462312 0.93085106 0.87634409 0.71351351
|
|
0.97382199 0.95854922 0.98412698 0.79569892]
|
|
|
|
mean value: 0.8983325494425128
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17113066 0.15320778 0.16114116 0.16019368 0.15601373 0.155586
|
|
0.15632606 0.16162801 0.15992212 0.15480018]
|
|
|
|
mean value: 0.1589949369430542
|
|
|
|
key: score_time
|
|
value: [0.01620245 0.0164144 0.01639581 0.01671028 0.01555824 0.01625705
|
|
0.0161562 0.01611733 0.01684666 0.01635766]
|
|
|
|
mean value: 0.016301608085632323
|
|
|
|
key: test_mcc
|
|
value: [1. 0.95346259 1. 0.95227002 0.95238095 0.95238095
|
|
0.95227002 0.8547619 0.95227002 1. ]
|
|
|
|
mean value: 0.9569796444320845
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97619048 1. 0.97560976 0.97560976 0.97560976
|
|
0.97560976 0.92682927 0.97560976 1. ]
|
|
|
|
mean value: 0.9781068524970964
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97674419 1. 0.97674419 0.97560976 0.97560976
|
|
0.97435897 0.92682927 0.97435897 1. ]
|
|
|
|
mean value: 0.9780255101298777
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95454545 1. 0.95454545 1. 1.
|
|
1. 0.9047619 1. 1. ]
|
|
|
|
mean value: 0.9813852813852814
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.95238095 0.95238095
|
|
0.95 0.95 0.95 1. ]
|
|
|
|
mean value: 0.9754761904761905
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97619048 1. 0.975 0.97619048 0.97619048
|
|
0.975 0.92738095 0.975 1. ]
|
|
|
|
mean value: 0.9780952380952381
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95454545 1. 0.95454545 0.95238095 0.95238095
|
|
0.95 0.86363636 0.95 1. ]
|
|
|
|
mean value: 0.9577489177489178
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.9
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06097746 0.05407095 0.05890322 0.05291629 0.05129123 0.04869723
|
|
0.05514884 0.06428218 0.07081532 0.0492568 ]
|
|
|
|
mean value: 0.05663595199584961
|
|
|
|
key: score_time
|
|
value: [0.02573156 0.02581716 0.02943945 0.02417064 0.01993871 0.02304506
|
|
0.02579427 0.03719068 0.029531 0.02628541]
|
|
|
|
mean value: 0.026694393157958983
|
|
|
|
key: test_mcc
|
|
value: [0.90889326 0.9047619 0.95238095 0.95227002 0.90692382 0.95238095
|
|
0.95227002 0.8547619 1. 0.95227002]
|
|
|
|
mean value: 0.9336912842968128
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99460913 0.99462366 0.9946235 0.99462366 0.9946235
|
|
0.99462366 1. 1. 0.99462366]
|
|
|
|
mean value: 0.9962350748975697
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.95238095 0.97560976 0.97560976 0.95121951 0.97560976
|
|
0.97560976 0.92682927 1. 0.97560976]
|
|
|
|
mean value: 0.9660859465737515
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.9972973 0.99730458 0.99730458 0.99730458 0.99730458
|
|
0.99730458 1. 1. 0.99730458]
|
|
|
|
mean value: 0.9981124790558753
|
|
|
|
key: test_fscore
|
|
value: [0.95 0.95238095 0.97560976 0.97674419 0.95 0.97560976
|
|
0.97435897 0.92682927 1. 0.97435897]
|
|
|
|
mean value: 0.9655891867633217
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99728997 0.99730458 0.99728997 0.99730458 0.99728997
|
|
0.99730458 1. 1. 0.99730458]
|
|
|
|
mean value: 0.9981088247540157
|
|
|
|
key: test_precision
|
|
value: [1. 0.95238095 1. 0.95454545 1. 1.
|
|
1. 0.9047619 1. 1. ]
|
|
|
|
mean value: 0.9811688311688311
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.99462366 1. 0.99462366 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9989247311827957
|
|
|
|
key: test_recall
|
|
value: [0.9047619 0.95238095 0.95238095 1. 0.9047619 0.95238095
|
|
0.95 0.95 1. 0.95 ]
|
|
|
|
mean value: 0.9516666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.99459459 1. 0.99459459 1. 0.99459459
|
|
0.99462366 1. 1. 0.99462366]
|
|
|
|
mean value: 0.9973031095611741
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.95238095 0.97619048 0.975 0.95238095 0.97619048
|
|
0.975 0.92738095 1. 0.975 ]
|
|
|
|
mean value: 0.9661904761904762
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.9972973 0.99731183 0.9972973 0.99731183 0.9972973
|
|
0.99731183 1. 1. 0.99731183]
|
|
|
|
mean value: 0.9981139203719849
|
|
|
|
key: test_jcc
|
|
value: [0.9047619 0.90909091 0.95238095 0.95454545 0.9047619 0.95238095
|
|
0.95 0.86363636 1. 0.95 ]
|
|
|
|
mean value: 0.9341558441558442
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99459459 0.99462366 0.99459459 0.99462366 0.99459459
|
|
0.99462366 1. 1. 0.99462366]
|
|
|
|
mean value: 0.9962278407439698
|
|
|
|
MCC on Blind test: 0.87
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10364151 0.0689404 0.09549975 0.11759543 0.12540889 0.12004828
|
|
0.07170916 0.10064197 0.09738994 0.06381583]
|
|
|
|
mean value: 0.0964691162109375
|
|
|
|
key: score_time
|
|
value: [0.02268863 0.01398158 0.02261162 0.03039312 0.02353644 0.02690196
|
|
0.0140779 0.02318907 0.01394606 0.01409626]
|
|
|
|
mean value: 0.020542263984680176
|
|
|
|
key: test_mcc
|
|
value: [0.71754731 0.80952381 0.66668392 0.71121921 0.8547619 0.85441771
|
|
0.81975606 0.7633652 0.7633652 0.81975606]
|
|
|
|
mean value: 0.7780396377492421
|
|
|
|
key: train_mcc
|
|
value: [0.97843556 0.97310093 0.97317174 0.97317174 0.97317174 0.98395537
|
|
0.98921825 0.978494 0.98921825 0.97317407]
|
|
|
|
mean value: 0.9785111637414805
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.9047619 0.82926829 0.85365854 0.92682927 0.92682927
|
|
0.90243902 0.87804878 0.87804878 0.90243902]
|
|
|
|
mean value: 0.8859465737514518
|
|
|
|
key: train_accuracy
|
|
value: [0.98918919 0.98648649 0.98652291 0.98652291 0.98652291 0.99191375
|
|
0.99460916 0.98921833 0.99460916 0.98652291]
|
|
|
|
mean value: 0.9892117724193196
|
|
|
|
key: test_fscore
|
|
value: [0.85 0.9047619 0.82051282 0.85 0.92682927 0.93023256
|
|
0.88888889 0.86486486 0.86486486 0.88888889]
|
|
|
|
mean value: 0.8789844059214451
|
|
|
|
key: train_fscore
|
|
value: [0.98913043 0.98637602 0.98637602 0.98637602 0.98637602 0.99182561
|
|
0.99462366 0.98918919 0.99462366 0.98644986]
|
|
|
|
mean value: 0.989134650057088
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.9047619 0.88888889 0.89473684 0.95 0.90909091
|
|
1. 0.94117647 0.94117647 1. ]
|
|
|
|
mean value: 0.93245683281287
|
|
|
|
key: train_precision
|
|
value: [0.99453552 0.99450549 0.99450549 0.99450549 0.99450549 1.
|
|
0.99462366 0.99456522 0.99462366 0.99453552]
|
|
|
|
mean value: 0.9950905545492605
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.9047619 0.76190476 0.80952381 0.9047619 0.95238095
|
|
0.8 0.8 0.8 0.8 ]
|
|
|
|
mean value: 0.8342857142857143
|
|
|
|
key: train_recall
|
|
value: [0.98378378 0.97837838 0.97837838 0.97837838 0.97837838 0.98378378
|
|
0.99462366 0.98387097 0.99462366 0.97849462]
|
|
|
|
mean value: 0.9832693984306887
|
|
|
|
key: test_roc_auc
|
|
value: [0.85714286 0.9047619 0.83095238 0.8547619 0.92738095 0.92619048
|
|
0.9 0.87619048 0.87619048 0.9 ]
|
|
|
|
mean value: 0.8853571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.98918919 0.98648649 0.98650102 0.98650102 0.98650102 0.99189189
|
|
0.99460913 0.98923278 0.99460913 0.98654461]
|
|
|
|
mean value: 0.9892066259808195
|
|
|
|
key: test_jcc
|
|
value: [0.73913043 0.82608696 0.69565217 0.73913043 0.86363636 0.86956522
|
|
0.8 0.76190476 0.76190476 0.8 ]
|
|
|
|
mean value: 0.7857011104837192
|
|
|
|
key: train_jcc
|
|
value: [0.97849462 0.97311828 0.97311828 0.97311828 0.97311828 0.98378378
|
|
0.98930481 0.97860963 0.98930481 0.97326203]
|
|
|
|
mean value: 0.9785232809141727
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.54057002 0.54626584 0.54693365 0.54073143 0.54698849 0.52059269
|
|
0.54536033 0.54451895 0.55396438 0.55362821]
|
|
|
|
mean value: 0.543955397605896
|
|
|
|
key: score_time
|
|
value: [0.00982523 0.00935054 0.00933266 0.00924683 0.00945854 0.0092802
|
|
0.00935721 0.00927353 0.00961375 0.00918651]
|
|
|
|
mean value: 0.009392499923706055
|
|
|
|
key: test_mcc
|
|
value: [1. 0.95346259 1. 0.90238095 0.95238095 0.95238095
|
|
0.95227002 0.90692382 1. 1. ]
|
|
|
|
mean value: 0.9619799285556849
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97619048 1. 0.95121951 0.97560976 0.97560976
|
|
0.97560976 0.95121951 1. 1. ]
|
|
|
|
mean value: 0.9805458768873403
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97674419 1. 0.95238095 0.97560976 0.97560976
|
|
0.97435897 0.95238095 1. 1. ]
|
|
|
|
mean value: 0.9807084577362513
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95454545 1. 0.95238095 1. 1.
|
|
1. 0.90909091 1. 1. ]
|
|
|
|
mean value: 0.9816017316017316
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.95238095 0.95238095 0.95238095
|
|
0.95 1. 1. 1. ]
|
|
|
|
mean value: 0.9807142857142856
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97619048 1. 0.95119048 0.97619048 0.97619048
|
|
0.975 0.95238095 1. 1. ]
|
|
|
|
mean value: 0.9807142857142856
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95454545 1. 0.90909091 0.95238095 0.95238095
|
|
0.95 0.90909091 1. 1. ]
|
|
|
|
mean value: 0.9627489177489177
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.86
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02883434 0.0289309 0.02810097 0.05655909 0.03880763 0.04022145
|
|
0.03861618 0.02902699 0.02924132 0.03623962]
|
|
|
|
mean value: 0.03545784950256348
|
|
|
|
key: score_time
|
|
value: [0.01243997 0.01541114 0.01505589 0.01314878 0.02119303 0.02428484
|
|
0.01450896 0.02024794 0.01529074 0.01999283]
|
|
|
|
mean value: 0.01715741157531738
|
|
|
|
key: test_mcc
|
|
value: [0.68640647 0.74535599 0.63496528 0.7197263 0.7565654 0.73786479
|
|
0.58066054 0.6133669 0.86333169 0.8047619 ]
|
|
|
|
mean value: 0.7143005278179001
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.85714286 0.80487805 0.85365854 0.87804878 0.85365854
|
|
0.7804878 0.80487805 0.92682927 0.90243902]
|
|
|
|
mean value: 0.8495354239256678
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85106383 0.875 0.83333333 0.86956522 0.88372093 0.875
|
|
0.8 0.80952381 0.93023256 0.9 ]
|
|
|
|
mean value: 0.8627439678407774
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.77777778 0.74074074 0.8 0.86363636 0.77777778
|
|
0.72 0.77272727 0.86956522 0.9 ]
|
|
|
|
mean value: 0.7991455919282007
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 0.95238095 0.95238095 0.9047619 1.
|
|
0.9 0.85 1. 0.9 ]
|
|
|
|
mean value: 0.9411904761904761
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.85714286 0.80119048 0.85119048 0.87738095 0.85
|
|
0.78333333 0.80595238 0.92857143 0.90238095]
|
|
|
|
mean value: 0.849047619047619
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.74074074 0.77777778 0.71428571 0.76923077 0.79166667 0.77777778
|
|
0.66666667 0.68 0.86956522 0.81818182]
|
|
|
|
mean value: 0.7605893148719236
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02349377 0.03713441 0.03637457 0.03698802 0.03131008 0.03687334
|
|
0.03689528 0.03683734 0.03825593 0.03702378]
|
|
|
|
mean value: 0.03511865139007568
|
|
|
|
key: score_time
|
|
value: [0.02268529 0.02422953 0.0209794 0.02236128 0.02284622 0.02414441
|
|
0.02387094 0.02138376 0.02061534 0.02167439]
|
|
|
|
mean value: 0.02247905731201172
|
|
|
|
key: test_mcc
|
|
value: [0.9047619 0.95346259 0.95238095 0.95227002 0.95238095 0.90238095
|
|
0.90238095 0.75714286 0.85441771 0.80817439]
|
|
|
|
mean value: 0.8939753275609796
|
|
|
|
key: train_mcc
|
|
value: [0.97843556 0.96251377 0.97317407 0.94659116 0.97317407 0.97866529
|
|
0.96787795 0.97305937 0.95709306 0.96787795]
|
|
|
|
mean value: 0.9678462240831375
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.97619048 0.97560976 0.97560976 0.97560976 0.95121951
|
|
0.95121951 0.87804878 0.92682927 0.90243902]
|
|
|
|
mean value: 0.9465156794425087
|
|
|
|
key: train_accuracy
|
|
value: [0.98918919 0.98108108 0.98652291 0.97304582 0.98652291 0.98921833
|
|
0.98382749 0.98652291 0.97843666 0.98382749]
|
|
|
|
mean value: 0.9838194798572157
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.97674419 0.97560976 0.97674419 0.97560976 0.95238095
|
|
0.95 0.87804878 0.92307692 0.89473684]
|
|
|
|
mean value: 0.9455332334720041
|
|
|
|
key: train_fscore
|
|
value: [0.98924731 0.98133333 0.98659517 0.97340426 0.98659517 0.98930481
|
|
0.98404255 0.98659517 0.9787234 0.98404255]
|
|
|
|
mean value: 0.9839883746741166
|
|
|
|
key: test_precision
|
|
value: [0.95238095 0.95454545 1. 0.95454545 1. 0.95238095
|
|
0.95 0.85714286 0.94736842 0.94444444]
|
|
|
|
mean value: 0.9512808536492747
|
|
|
|
key: train_precision
|
|
value: [0.98395722 0.96842105 0.9787234 0.95811518 0.9787234 0.97883598
|
|
0.97368421 0.98395722 0.96842105 0.97368421]
|
|
|
|
mean value: 0.9746522935411154
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 0.95238095 1. 0.95238095 0.95238095
|
|
0.95 0.9 0.9 0.85 ]
|
|
|
|
mean value: 0.940952380952381
|
|
|
|
key: train_recall /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
value: [0.99459459 0.99459459 0.99459459 0.98918919 0.99459459 1.
|
|
0.99462366 0.98924731 0.98924731 0.99462366]
|
|
|
|
mean value: 0.9935309503051439
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.97619048 0.97619048 0.975 0.97619048 0.95119048
|
|
0.95119048 0.87857143 0.92619048 0.90119048]
|
|
|
|
mean value: 0.9464285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.98918919 0.98108108 0.98654461 0.97308922 0.98654461 0.98924731
|
|
0.98379831 0.98651555 0.97840744 0.98379831]
|
|
|
|
mean value: 0.9838215634989829
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.95454545 0.95238095 0.95454545 0.95238095 0.90909091
|
|
0.9047619 0.7826087 0.85714286 0.80952381]
|
|
|
|
mean value: 0.8986071899115378
|
|
|
|
key: train_jcc
|
|
value: [0.9787234 0.96335079 0.97354497 0.94818653 0.97354497 0.97883598
|
|
0.96858639 0.97354497 0.95833333 0.96858639]
|
|
|
|
mean value: 0.9685237725766385
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.26893139 0.27967191 0.2897954 0.31818628 0.27074409 0.26609921
|
|
0.26971841 0.26205802 0.26608682 0.26403761]
|
|
|
|
mean value: 0.2755329132080078
|
|
|
|
key: score_time
|
|
value: [0.02258849 0.02014399 0.02077603 0.02329016 0.02250028 0.01875901
|
|
0.01647425 0.02191854 0.02195883 0.02291036]
|
|
|
|
mean value: 0.02113199234008789
|
|
|
|
key: test_mcc
|
|
value: [0.9047619 0.95346259 0.95238095 0.95227002 0.95238095 0.90238095
|
|
0.90238095 0.75714286 0.85441771 0.80817439]
|
|
|
|
mean value: 0.8939753275609796
|
|
|
|
key: train_mcc
|
|
value: [0.97843556 0.96251377 0.97317407 0.94659116 0.97317407 0.97866529
|
|
0.98384144 0.97305937 0.95709306 0.96787795]
|
|
|
|
mean value: 0.9694425730208168
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.97619048 0.97560976 0.97560976 0.97560976 0.95121951
|
|
0.95121951 0.87804878 0.92682927 0.90243902]
|
|
|
|
mean value: 0.9465156794425087
|
|
|
|
key: train_accuracy
|
|
value: [0.98918919 0.98108108 0.98652291 0.97304582 0.98652291 0.98921833
|
|
0.99191375 0.98652291 0.97843666 0.98382749]
|
|
|
|
mean value: 0.9846281051941429
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.97674419 0.97560976 0.97674419 0.97560976 0.95238095
|
|
0.95 0.87804878 0.92307692 0.89473684]
|
|
|
|
mean value: 0.9455332334720041
|
|
|
|
key: train_fscore
|
|
value: [0.98924731 0.98133333 0.98659517 0.97340426 0.98659517 0.98930481
|
|
0.9919571 0.98659517 0.9787234 0.98404255]
|
|
|
|
mean value: 0.9847798298107318
|
|
|
|
key: test_precision
|
|
value: [0.95238095 0.95454545 1. 0.95454545 1. 0.95238095
|
|
0.95 0.85714286 0.94736842 0.94444444]
|
|
|
|
mean value: 0.9512808536492747
|
|
|
|
key: train_precision
|
|
value: [0.98395722 0.96842105 0.9787234 0.95811518 0.9787234 0.97883598
|
|
0.98930481 0.98395722 0.96842105 0.97368421]
|
|
|
|
mean value: 0.9762143537719062
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 0.95238095 1. 0.95238095 0.95238095
|
|
0.95 0.9 0.9 0.85 ]
|
|
|
|
mean value: 0.940952380952381
|
|
|
|
key: train_recall
|
|
value: [0.99459459 0.99459459 0.99459459 0.98918919 0.99459459 1.
|
|
0.99462366 0.98924731 0.98924731 0.99462366]
|
|
|
|
mean value: 0.9935309503051439
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.97619048 0.97619048 0.975 0.97619048 0.95119048
|
|
0.95119048 0.87857143 0.92619048 0.90119048]
|
|
|
|
mean value: 0.9464285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.98918919 0.98108108 0.98654461 0.97308922 0.98654461 0.98924731
|
|
0.99190642 0.98651555 0.97840744 0.98379831]
|
|
|
|
mean value: 0.9846323743097937
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.95454545 0.95238095 0.95454545 0.95238095 0.90909091
|
|
0.9047619 0.7826087 0.85714286 0.80952381]
|
|
|
|
mean value: 0.8986071899115378
|
|
|
|
key: train_jcc
|
|
value: [0.9787234 0.96335079 0.97354497 0.94818653 0.97354497 0.97883598
|
|
0.98404255 0.97354497 0.95833333 0.96858639]
|
|
|
|
mean value: 0.970069389152332
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02979517 0.03099942 0.02958393 0.02999544 0.0327394 0.03862596
|
|
0.02719903 0.02743649 0.02765322 0.02869177]
|
|
|
|
mean value: 0.03027198314666748
|
|
|
|
key: score_time
|
|
value: [0.01283884 0.0125618 0.01267099 0.01301289 0.01287889 0.01229525
|
|
0.01248908 0.01229692 0.01221228 0.01231456]
|
|
|
|
mean value: 0.012557148933410645
|
|
|
|
key: test_mcc
|
|
value: [0.73029674 0.46225016 0.80909091 0.63305416 0.74795759 0.52727273
|
|
0.71562645 0.82572282 0.82275335 0.33028913]
|
|
|
|
mean value: 0.6604314052101561
|
|
|
|
key: train_mcc
|
|
value: [0.87387789 0.84252546 0.86391052 0.87508285 0.87452017 0.8852317
|
|
0.87454765 0.89549293 0.86509383 0.89609412]
|
|
|
|
mean value: 0.874637713340964
|
|
|
|
key: test_accuracy
|
|
value: [0.86363636 0.72727273 0.9047619 0.80952381 0.85714286 0.76190476
|
|
0.85714286 0.9047619 0.9047619 0.66666667]
|
|
|
|
mean value: 0.8257575757575757
|
|
|
|
key: train_accuracy
|
|
value: [0.93684211 0.92105263 0.93193717 0.93717277 0.93717277 0.94240838
|
|
0.93717277 0.94764398 0.93193717 0.94764398]
|
|
|
|
mean value: 0.9370983742077708
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.75 0.9 0.81818182 0.86956522 0.76190476
|
|
0.86956522 0.9 0.91666667 0.69565217]
|
|
|
|
mean value: 0.8351101072840204
|
|
|
|
key: train_fscore
|
|
value: [0.9375 0.92227979 0.93264249 0.93877551 0.93814433 0.94358974
|
|
0.9375 0.94791667 0.93333333 0.94845361]
|
|
|
|
mean value: 0.9380135471730902
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.69230769 0.9 0.75 0.76923077 0.72727273
|
|
0.83333333 1. 0.84615385 0.66666667]
|
|
|
|
mean value: 0.8018298368298369
|
|
|
|
key: train_precision
|
|
value: [0.92783505 0.90816327 0.92783505 0.92 0.92857143 0.92929293
|
|
0.92783505 0.93814433 0.91 0.92929293]
|
|
|
|
mean value: 0.9246970036999492
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.81818182 0.9 0.9 1. 0.8
|
|
0.90909091 0.81818182 1. 0.72727273]
|
|
|
|
mean value: 0.8781818181818182
|
|
|
|
key: train_recall
|
|
value: [0.94736842 0.93684211 0.9375 0.95833333 0.94791667 0.95833333
|
|
0.94736842 0.95789474 0.95789474 0.96842105]
|
|
|
|
mean value: 0.9517872807017543
|
|
|
|
key: test_roc_auc
|
|
value: [0.86363636 0.72727273 0.90454545 0.81363636 0.86363636 0.76363636
|
|
0.85454545 0.90909091 0.9 0.66363636]
|
|
|
|
mean value: 0.8263636363636364
|
|
|
|
key: train_roc_auc
|
|
value: [0.93684211 0.92105263 0.93190789 0.9370614 0.93711623 0.94232456
|
|
0.93722588 0.94769737 0.93207237 0.94775219]
|
|
|
|
mean value: 0.9371052631578948
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.6 0.81818182 0.69230769 0.76923077 0.61538462
|
|
0.76923077 0.81818182 0.84615385 0.53333333]
|
|
|
|
mean value: 0.7231235431235431
|
|
|
|
key: train_jcc
|
|
value: [0.88235294 0.85576923 0.87378641 0.88461538 0.88349515 0.89320388
|
|
0.88235294 0.9009901 0.875 0.90196078]
|
|
|
|
mean value: 0.8833526817954387
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.72123003 0.84316516 0.7306025 0.74526048 0.81987715 0.71120024
|
|
0.72252727 0.80323362 0.73404384 0.85311699]
|
|
|
|
mean value: 0.768425726890564
|
|
|
|
key: score_time
|
|
value: [0.01541853 0.01553202 0.01258135 0.01577401 0.01313853 0.01267266
|
|
0.01245832 0.01556325 0.01253867 0.01255989]
|
|
|
|
mean value: 0.013823723793029786
|
|
|
|
key: test_mcc
|
|
value: [1. 0.64715023 0.74161985 0.71818182 0.71818182 0.71818182
|
|
0.71818182 0.67419986 0.90829511 0.42727273]
|
|
|
|
mean value: 0.727126504633149
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.81818182 0.85714286 0.85714286 0.85714286 0.85714286
|
|
0.85714286 0.80952381 0.95238095 0.71428571]
|
|
|
|
mean value: 0.858008658008658
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.83333333 0.82352941 0.85714286 0.85714286 0.85714286
|
|
0.85714286 0.77777778 0.95652174 0.72727273]
|
|
|
|
mean value: 0.8547006417850408
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.76923077 1. 0.81818182 0.81818182 0.81818182
|
|
0.9 1. 0.91666667 0.72727273]
|
|
|
|
mean value: 0.8767715617715618
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.90909091 0.7 0.9 0.9 0.9
|
|
0.81818182 0.63636364 1. 0.72727273]
|
|
|
|
mean value: 0.8490909090909091
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.81818182 0.85 0.85909091 0.85909091 0.85909091
|
|
0.85909091 0.81818182 0.95 0.71363636]
|
|
|
|
mean value: 0.8586363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.71428571 0.7 0.75 0.75 0.75
|
|
0.75 0.63636364 0.91666667 0.57142857]
|
|
|
|
mean value: 0.7538744588744588
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01317358 0.011415 0.01040435 0.00997591 0.00988245 0.00988269
|
|
0.01001906 0.00998998 0.00979877 0.00988674]
|
|
|
|
mean value: 0.010442852973937988
|
|
|
|
key: score_time
|
|
value: [0.01445031 0.01000452 0.00979805 0.00970507 0.00962663 0.00961113
|
|
0.00959015 0.00951719 0.00951576 0.00954485]
|
|
|
|
mean value: 0.01013636589050293
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.40824829 0.74795759 0.53300179 0.67419986 0.63305416
|
|
0.35527986 0.66332496 0.33709993 0.33709993]
|
|
|
|
mean value: 0.5067230850517317
|
|
|
|
key: train_mcc
|
|
value: [0.50728584 0.59286988 0.53138382 0.59435593 0.53227428 0.4917695
|
|
0.60438034 0.52466542 0.52683546 0.52870448]
|
|
|
|
mean value: 0.5434524940733954
|
|
|
|
key: test_accuracy
|
|
value: [0.68181818 0.68181818 0.85714286 0.71428571 0.80952381 0.80952381
|
|
0.66666667 0.80952381 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7363636363636363
|
|
|
|
key: train_accuracy
|
|
value: [0.73684211 0.78421053 0.7486911 0.78534031 0.7539267 0.73298429
|
|
0.80104712 0.7486911 0.7539267 0.7486911 ]
|
|
|
|
mean value: 0.7594351060898319
|
|
|
|
key: test_fscore
|
|
value: [0.72 0.74074074 0.86956522 0.76923077 0.83333333 0.81818182
|
|
0.63157895 0.84615385 0.72 0.72 ]
|
|
|
|
mean value: 0.7668784672400233
|
|
|
|
key: train_fscore
|
|
value: [0.77678571 0.81105991 0.78761062 0.81278539 0.78733032 0.77130045
|
|
0.80808081 0.78181818 0.78139535 0.78378378]
|
|
|
|
mean value: 0.7901950517409254
|
|
|
|
key: test_precision
|
|
value: [0.64285714 0.625 0.76923077 0.625 0.71428571 0.75
|
|
0.75 0.73333333 0.64285714 0.64285714]
|
|
|
|
mean value: 0.6895421245421246
|
|
|
|
key: train_precision
|
|
value: [0.6744186 0.72131148 0.68461538 0.72357724 0.696 0.67716535
|
|
0.77669903 0.688 0.7 0.68503937]
|
|
|
|
mean value: 0.7026826453984404
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.90909091 1. 1. 1. 0.9
|
|
0.54545455 1. 0.81818182 0.81818182]
|
|
|
|
mean value: 0.8809090909090909
|
|
|
|
key: train_recall
|
|
value: [0.91578947 0.92631579 0.92708333 0.92708333 0.90625 0.89583333
|
|
0.84210526 0.90526316 0.88421053 0.91578947]
|
|
|
|
mean value: 0.9045723684210526
|
|
|
|
key: test_roc_auc
|
|
value: [0.68181818 0.68181818 0.86363636 0.72727273 0.81818182 0.81363636
|
|
0.67272727 0.8 0.65909091 0.65909091]
|
|
|
|
mean value: 0.7377272727272728
|
|
|
|
key: train_roc_auc
|
|
value: [0.73684211 0.78421053 0.74775219 0.7845943 0.753125 0.73212719
|
|
0.80126096 0.74950658 0.75460526 0.7495614 ]
|
|
|
|
mean value: 0.7593585526315789
|
|
|
|
key: test_jcc
|
|
value: [0.5625 0.58823529 0.76923077 0.625 0.71428571 0.69230769
|
|
0.46153846 0.73333333 0.5625 0.5625 ]
|
|
|
|
mean value: 0.6271431264813618
|
|
|
|
key: train_jcc
|
|
value: [0.6350365 0.68217054 0.64963504 0.68461538 0.64925373 0.62773723
|
|
0.6779661 0.64179104 0.64122137 0.64444444]
|
|
|
|
mean value: 0.6533871382679696
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0090909 0.00899744 0.00900245 0.00919318 0.01037669 0.00928235
|
|
0.00912166 0.00934029 0.00910783 0.00934601]
|
|
|
|
mean value: 0.009285879135131837
|
|
|
|
key: score_time
|
|
value: [0.00865531 0.00874138 0.00873184 0.00945687 0.00972223 0.00895834
|
|
0.00879216 0.00884581 0.00873852 0.00886703]
|
|
|
|
mean value: 0.008950948715209961
|
|
|
|
key: test_mcc
|
|
value: [0.63636364 0.45454545 0.61818182 0.33636364 0.63305416 0.71562645
|
|
0.55161872 0.4719399 0.23373675 0.43007562]
|
|
|
|
mean value: 0.5081506148469601
|
|
|
|
key: train_mcc
|
|
value: [0.70920321 0.72063664 0.67566396 0.73823885 0.6859713 0.6859713
|
|
0.69638158 0.69638158 0.62349105 0.7403031 ]
|
|
|
|
mean value: 0.697224255471689
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.72727273 0.80952381 0.66666667 0.80952381 0.85714286
|
|
0.76190476 0.71428571 0.61904762 0.71428571]
|
|
|
|
mean value: 0.7497835497835498
|
|
|
|
key: train_accuracy
|
|
value: [0.85263158 0.85789474 0.83769634 0.86910995 0.84293194 0.84293194
|
|
0.84816754 0.84816754 0.81151832 0.86910995]
|
|
|
|
mean value: 0.8480159823642877
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.72727273 0.8 0.66666667 0.81818182 0.84210526
|
|
0.73684211 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7492583732057416
|
|
|
|
key: train_fscore
|
|
value: [0.86 0.86567164 0.84102564 0.87046632 0.84536082 0.84536082
|
|
0.84816754 0.84816754 0.80645161 0.87309645]
|
|
|
|
mean value: 0.850376839168251
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.72727273 0.8 0.63636364 0.75 0.88888889
|
|
0.875 0.85714286 0.61538462 0.69230769]
|
|
|
|
mean value: 0.7660542235542236
|
|
|
|
key: train_precision
|
|
value: [0.81904762 0.82075472 0.82828283 0.86597938 0.83673469 0.83673469
|
|
0.84375 0.84375 0.82417582 0.84313725]
|
|
|
|
mean value: 0.8362347012587765
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.72727273 0.8 0.7 0.9 0.8
|
|
0.63636364 0.54545455 0.72727273 0.81818182]
|
|
|
|
mean value: 0.7472727272727273
|
|
|
|
key: train_recall
|
|
value: [0.90526316 0.91578947 0.85416667 0.875 0.85416667 0.85416667
|
|
0.85263158 0.85263158 0.78947368 0.90526316]
|
|
|
|
mean value: 0.8658552631578947
|
|
|
|
key: test_roc_auc
|
|
value: [0.81818182 0.72727273 0.80909091 0.66818182 0.81363636 0.85454545
|
|
0.76818182 0.72272727 0.61363636 0.70909091]
|
|
|
|
mean value: 0.7504545454545455
|
|
|
|
key: train_roc_auc
|
|
value: [0.85263158 0.85789474 0.83760965 0.86907895 0.84287281 0.84287281
|
|
0.84819079 0.84819079 0.81140351 0.86929825]
|
|
|
|
mean value: 0.8480043859649123
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.57142857 0.66666667 0.5 0.69230769 0.72727273
|
|
0.58333333 0.5 0.5 0.6 ]
|
|
|
|
mean value: 0.6033316683316683
|
|
|
|
key: train_jcc
|
|
value: [0.75438596 0.76315789 0.72566372 0.7706422 0.73214286 0.73214286
|
|
0.73636364 0.73636364 0.67567568 0.77477477]
|
|
|
|
mean value: 0.7401313215761582
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00881052 0.00952911 0.00968528 0.00963211 0.00966859 0.00922441
|
|
0.00979471 0.00978827 0.00979757 0.00972414]
|
|
|
|
mean value: 0.009565472602844238
|
|
|
|
key: score_time
|
|
value: [0.01085305 0.01080704 0.01071835 0.0106647 0.01091552 0.01068163
|
|
0.01075578 0.01077008 0.01102042 0.01079583]
|
|
|
|
mean value: 0.010798239707946777
|
|
|
|
key: test_mcc
|
|
value: [0.18257419 0. 0.90909091 0.52295779 0.63305416 0.23636364
|
|
0.05504819 0.39196475 0.33028913 0.23373675]
|
|
|
|
mean value: 0.34950794979197125
|
|
|
|
key: train_mcc
|
|
value: [0.6344324 0.7264768 0.60269927 0.66509486 0.65445773 0.70692117
|
|
0.64512756 0.69662073 0.65445773 0.6871103 ]
|
|
|
|
mean value: 0.6673398564766175
|
|
|
|
key: test_accuracy
|
|
value: [0.59090909 0.5 0.95238095 0.76190476 0.80952381 0.61904762
|
|
0.52380952 0.66666667 0.66666667 0.61904762]
|
|
|
|
mean value: 0.670995670995671
|
|
|
|
key: train_accuracy
|
|
value: [0.81578947 0.86315789 0.80104712 0.83246073 0.82722513 0.85340314
|
|
0.82198953 0.84816754 0.82722513 0.84293194]
|
|
|
|
mean value: 0.8333397630201157
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.42105263 0.95238095 0.73684211 0.81818182 0.6
|
|
0.5 0.58823529 0.69565217 0.66666667]
|
|
|
|
mean value: 0.6550440213530804
|
|
|
|
key: train_fscore
|
|
value: [0.80662983 0.86170213 0.79787234 0.83157895 0.82901554 0.8556701
|
|
0.81521739 0.84491979 0.82539683 0.83695652]
|
|
|
|
mean value: 0.8304959421378466
|
|
|
|
key: test_precision
|
|
value: [0.6 0.5 0.90909091 0.77777778 0.75 0.6
|
|
0.55555556 0.83333333 0.66666667 0.61538462]
|
|
|
|
mean value: 0.6807808857808858
|
|
|
|
key: train_precision
|
|
value: [0.84883721 0.87096774 0.81521739 0.84042553 0.82474227 0.84693878
|
|
0.84269663 0.85869565 0.82978723 0.86516854]
|
|
|
|
mean value: 0.8443476972764284
|
|
|
|
key: test_recall
|
|
value: [0.54545455 0.36363636 1. 0.7 0.9 0.6
|
|
0.45454545 0.45454545 0.72727273 0.72727273]
|
|
|
|
mean value: 0.6472727272727272
|
|
|
|
key: train_recall
|
|
value: [0.76842105 0.85263158 0.78125 0.82291667 0.83333333 0.86458333
|
|
0.78947368 0.83157895 0.82105263 0.81052632]
|
|
|
|
mean value: 0.8175767543859649
|
|
|
|
key: test_roc_auc
|
|
value: [0.59090909 0.5 0.95454545 0.75909091 0.81363636 0.61818182
|
|
0.52727273 0.67727273 0.66363636 0.61363636]
|
|
|
|
mean value: 0.6718181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [0.81578947 0.86315789 0.80115132 0.83251096 0.82719298 0.8533443
|
|
0.82182018 0.84808114 0.82719298 0.84276316]
|
|
|
|
mean value: 0.8333004385964912
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.26666667 0.90909091 0.58333333 0.69230769 0.42857143
|
|
0.33333333 0.41666667 0.53333333 0.5 ]
|
|
|
|
mean value: 0.5063303363303363
|
|
|
|
key: train_jcc
|
|
value: [0.67592593 0.75700935 0.66371681 0.71171171 0.7079646 0.74774775
|
|
0.68807339 0.73148148 0.7027027 0.71962617]
|
|
|
|
mean value: 0.7105959894012878
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01154423 0.01183224 0.01161528 0.01154399 0.01124811 0.01126695
|
|
0.01114488 0.01137328 0.01132202 0.01127124]
|
|
|
|
mean value: 0.011416220664978027
|
|
|
|
key: score_time
|
|
value: [0.00947404 0.00990534 0.00957632 0.00936294 0.00929713 0.00936723
|
|
0.0095048 0.00931263 0.00929332 0.00933337]
|
|
|
|
mean value: 0.009442710876464843
|
|
|
|
key: test_mcc
|
|
value: [0.64715023 0.37796447 0.82572282 0.39196475 0.67419986 0.44038551
|
|
0.62641448 0.90909091 0.82275335 0.23373675]
|
|
|
|
mean value: 0.5949383133354442
|
|
|
|
key: train_mcc
|
|
value: [0.79818857 0.79172691 0.77786752 0.76212373 0.79905587 0.82557489
|
|
0.8047179 0.83546371 0.81125858 0.80327722]
|
|
|
|
mean value: 0.8009254890182891
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.68181818 0.9047619 0.66666667 0.80952381 0.71428571
|
|
0.80952381 0.95238095 0.9047619 0.61904762]
|
|
|
|
mean value: 0.7880952380952381
|
|
|
|
key: train_accuracy
|
|
value: [0.89473684 0.88947368 0.88481675 0.87434555 0.89528796 0.91099476
|
|
0.90052356 0.91623037 0.90052356 0.90052356]
|
|
|
|
mean value: 0.8967456599614219
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.72 0.90909091 0.72 0.83333333 0.72727273
|
|
0.83333333 0.95238095 0.91666667 0.66666667]
|
|
|
|
mean value: 0.8112077922077922
|
|
|
|
key: train_fscore
|
|
value: [0.90196078 0.89855072 0.89320388 0.88571429 0.90291262 0.91542289
|
|
0.90452261 0.91919192 0.90731707 0.9035533 ]
|
|
|
|
mean value: 0.9032350090012564
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.64285714 0.83333333 0.6 0.71428571 0.66666667
|
|
0.76923077 1. 0.84615385 0.61538462]
|
|
|
|
mean value: 0.7457142857142858
|
|
|
|
key: train_precision
|
|
value: [0.8440367 0.83035714 0.83636364 0.81578947 0.84545455 0.87619048
|
|
0.86538462 0.88349515 0.84545455 0.87254902]
|
|
|
|
mean value: 0.8515075297875789
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.81818182 1. 0.9 1. 0.8
|
|
0.90909091 0.90909091 1. 0.72727273]
|
|
|
|
mean value: 0.8972727272727272
|
|
|
|
key: train_recall
|
|
value: [0.96842105 0.97894737 0.95833333 0.96875 0.96875 0.95833333
|
|
0.94736842 0.95789474 0.97894737 0.93684211]
|
|
|
|
mean value: 0.9622587719298246
|
|
|
|
key: test_roc_auc
|
|
value: [0.81818182 0.68181818 0.90909091 0.67727273 0.81818182 0.71818182
|
|
0.80454545 0.95454545 0.9 0.61363636]
|
|
|
|
mean value: 0.7895454545454546
|
|
|
|
key: train_roc_auc
|
|
value: [0.89473684 0.88947368 0.88442982 0.87384868 0.89490132 0.91074561
|
|
0.90076754 0.91644737 0.90093202 0.90071272]
|
|
|
|
mean value: 0.8966995614035087
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.5625 0.83333333 0.5625 0.71428571 0.57142857
|
|
0.71428571 0.90909091 0.84615385 0.5 ]
|
|
|
|
mean value: 0.6927863802863803
|
|
|
|
key: train_jcc
|
|
value: [0.82142857 0.81578947 0.80701754 0.79487179 0.82300885 0.8440367
|
|
0.82568807 0.85046729 0.83035714 0.82407407]
|
|
|
|
mean value: 0.8236739510694793
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.84304023 0.7079587 0.69341254 0.83001733 0.68674278 0.71935916
|
|
0.85144925 0.70927286 0.68605232 0.81782484]
|
|
|
|
mean value: 0.7545130014419555
|
|
|
|
key: score_time
|
|
value: [0.01477885 0.0163033 0.01992774 0.01476359 0.01474428 0.01230025
|
|
0.01460433 0.01469612 0.01476789 0.0146749 ]
|
|
|
|
mean value: 0.015156126022338868
|
|
|
|
key: test_mcc
|
|
value: [0.63636364 0.36514837 0.71562645 0.55161872 0.63305416 0.44038551
|
|
0.35527986 0.55161872 0.80909091 0.52295779]
|
|
|
|
mean value: 0.5581144128303206
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.68181818 0.85714286 0.76190476 0.80952381 0.71428571
|
|
0.66666667 0.76190476 0.9047619 0.76190476]
|
|
|
|
mean value: 0.7738095238095238
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.69565217 0.84210526 0.7826087 0.81818182 0.72727273
|
|
0.63157895 0.73684211 0.90909091 0.7826087 ]
|
|
|
|
mean value: 0.7744123153734137
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.66666667 0.88888889 0.69230769 0.75 0.66666667
|
|
0.75 0.875 0.90909091 0.75 ]
|
|
|
|
mean value: 0.7766802641802641
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.72727273 0.8 0.9 0.9 0.8
|
|
0.54545455 0.63636364 0.90909091 0.81818182]
|
|
|
|
mean value: 0.7854545454545455
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81818182 0.68181818 0.85454545 0.76818182 0.81363636 0.71818182
|
|
0.67272727 0.76818182 0.90454545 0.75909091]
|
|
|
|
mean value: 0.7759090909090909
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.53333333 0.72727273 0.64285714 0.69230769 0.57142857
|
|
0.46153846 0.58333333 0.83333333 0.64285714]
|
|
|
|
mean value: 0.638056943056943
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01818252 0.01459813 0.01283693 0.01229072 0.01137853 0.01279497
|
|
0.01171303 0.01191187 0.01186252 0.01267433]
|
|
|
|
mean value: 0.013024353981018066
|
|
|
|
key: score_time
|
|
value: [0.0117681 0.00920272 0.00893354 0.00869656 0.00866723 0.00870919
|
|
0.00860786 0.0086484 0.00879455 0.00891566]
|
|
|
|
mean value: 0.00909438133239746
|
|
|
|
key: test_mcc
|
|
value: [1. 0.73029674 0.80909091 0.90829511 0.90909091 0.80909091
|
|
0.90829511 0.90909091 0.71818182 0.43007562]
|
|
|
|
mean value: 0.8131508025748061
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.86363636 0.9047619 0.95238095 0.95238095 0.9047619
|
|
0.95238095 0.95238095 0.85714286 0.71428571]
|
|
|
|
mean value: 0.9054112554112554
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.86956522 0.9 0.94736842 0.95238095 0.9
|
|
0.95652174 0.95238095 0.85714286 0.75 ]
|
|
|
|
mean value: 0.9085360139479133
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 0.9 1. 0.90909091 0.9
|
|
0.91666667 1. 0.9 0.69230769]
|
|
|
|
mean value: 0.9051398601398601
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.90909091 0.9 0.9 1. 0.9
|
|
1. 0.90909091 0.81818182 0.81818182]
|
|
|
|
mean value: 0.9154545454545455
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.86363636 0.90454545 0.95 0.95454545 0.90454545
|
|
0.95 0.95454545 0.85909091 0.70909091]
|
|
|
|
mean value: 0.905
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.76923077 0.81818182 0.9 0.90909091 0.81818182
|
|
0.91666667 0.90909091 0.75 0.6 ]
|
|
|
|
mean value: 0.8390442890442891
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09295607 0.09225941 0.091506 0.09173632 0.0931685 0.09369612
|
|
0.09518909 0.09360838 0.09597731 0.09335709]
|
|
|
|
mean value: 0.09334542751312255
|
|
|
|
key: score_time
|
|
value: [0.01729226 0.01723695 0.01731944 0.01766443 0.01851225 0.01764464
|
|
0.01801538 0.01791477 0.01864171 0.01731777]
|
|
|
|
mean value: 0.017755961418151854
|
|
|
|
key: test_mcc
|
|
value: [0.63636364 0.63636364 1. 0.52727273 0.74795759 0.52727273
|
|
0.4719399 1. 0.71562645 0.33028913]
|
|
|
|
mean value: 0.6593085799873805
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.81818182 1. 0.76190476 0.85714286 0.76190476
|
|
0.71428571 1. 0.85714286 0.66666667]
|
|
|
|
mean value: 0.8255411255411256
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.81818182 1. 0.76190476 0.86956522 0.76190476
|
|
0.66666667 1. 0.86956522 0.69565217]
|
|
|
|
mean value: 0.826162243553548
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.81818182 1. 0.72727273 0.76923077 0.72727273
|
|
0.85714286 1. 0.83333333 0.66666667]
|
|
|
|
mean value: 0.8217282717282718
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.81818182 1. 0.8 1. 0.8
|
|
0.54545455 1. 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8418181818181818
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81818182 0.81818182 1. 0.76363636 0.86363636 0.76363636
|
|
0.72272727 1. 0.85454545 0.66363636]
|
|
|
|
mean value: 0.8268181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.69230769 1. 0.61538462 0.76923077 0.61538462
|
|
0.5 1. 0.76923077 0.53333333]
|
|
|
|
mean value: 0.7187179487179487
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01018524 0.01018262 0.01032281 0.00936413 0.01030827 0.0103271
|
|
0.00937414 0.00911331 0.00936627 0.00902772]
|
|
|
|
mean value: 0.009757161140441895
|
|
|
|
key: score_time
|
|
value: [0.00935912 0.00933194 0.00907087 0.00914145 0.00938082 0.00943184
|
|
0.00886512 0.00865507 0.00879741 0.00870585]
|
|
|
|
mean value: 0.009073948860168457
|
|
|
|
key: test_mcc
|
|
value: [0.2773501 0.54772256 0.71818182 0.52295779 0.33636364 0.35527986
|
|
0.55161872 0.52727273 0.52727273 0.13483997]
|
|
|
|
mean value: 0.4498859905944563
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.63636364 0.77272727 0.85714286 0.76190476 0.66666667 0.66666667
|
|
0.76190476 0.76190476 0.76190476 0.57142857]
|
|
|
|
mean value: 0.7218614718614719
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.76190476 0.85714286 0.73684211 0.66666667 0.69565217
|
|
0.73684211 0.76190476 0.76190476 0.64 ]
|
|
|
|
mean value: 0.7285526860629835
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.8 0.81818182 0.77777778 0.63636364 0.61538462
|
|
0.875 0.8 0.8 0.57142857]
|
|
|
|
mean value: 0.7309521034521035
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.72727273 0.9 0.7 0.7 0.8
|
|
0.63636364 0.72727273 0.72727273 0.72727273]
|
|
|
|
mean value: 0.7372727272727273
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.63636364 0.77272727 0.85909091 0.75909091 0.66818182 0.67272727
|
|
0.76818182 0.76363636 0.76363636 0.56363636]
|
|
|
|
mean value: 0.7227272727272727
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.61538462 0.75 0.58333333 0.5 0.53333333
|
|
0.58333333 0.61538462 0.61538462 0.47058824]
|
|
|
|
mean value: 0.5766742081447964
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.30615759 1.29314303 1.21581531 1.25626373 1.22632051 1.20627904
|
|
1.20618749 1.20896769 1.21818185 1.19756627]
|
|
|
|
mean value: 1.2334882497787476
|
|
|
|
key: score_time
|
|
value: [0.09819245 0.09016585 0.09647751 0.09425235 0.0900619 0.08902097
|
|
0.08858657 0.08892465 0.09014058 0.09470963]
|
|
|
|
mean value: 0.09205324649810791
|
|
|
|
key: test_mcc
|
|
value: [0.73029674 0.75592895 1. 0.90909091 0.90909091 0.71818182
|
|
0.80909091 0.90909091 0.90829511 0.43007562]
|
|
|
|
mean value: 0.8079141865537268
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86363636 0.86363636 1. 0.95238095 0.95238095 0.85714286
|
|
0.9047619 0.95238095 0.95238095 0.71428571]
|
|
|
|
mean value: 0.9012987012987013
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.88 1. 0.95238095 0.95238095 0.85714286
|
|
0.90909091 0.95238095 0.95652174 0.75 ]
|
|
|
|
mean value: 0.9079463579898363
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.78571429 1. 0.90909091 0.90909091 0.81818182
|
|
0.90909091 1. 0.91666667 0.69230769]
|
|
|
|
mean value: 0.8773476523476523
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90909091 1. 1. 1. 1. 0.9
|
|
0.90909091 0.90909091 1. 0.81818182]
|
|
|
|
mean value: 0.9445454545454546
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86363636 0.86363636 1. 0.95454545 0.95454545 0.85909091
|
|
0.90454545 0.95454545 0.95 0.70909091]
|
|
|
|
mean value: 0.9013636363636364
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.78571429 1. 0.90909091 0.90909091 0.75
|
|
0.83333333 0.90909091 0.91666667 0.6 ]
|
|
|
|
mean value: 0.8382217782217782
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.90209532 0.89312887 0.89125085 0.87585258 0.90369177 0.9013927
|
|
0.9555521 0.85647655 0.92886138 0.84741616]
|
|
|
|
mean value: 0.8955718278884888
|
|
|
|
key: score_time
|
|
value: [0.25639296 0.21086621 0.20298195 0.16946197 0.25704312 0.1911087
|
|
0.12228298 0.24127936 0.19275093 0.20414257]
|
|
|
|
mean value: 0.20483107566833497
|
|
|
|
key: test_mcc
|
|
value: [0.83205029 0.75592895 1. 0.90909091 0.90909091 0.63305416
|
|
0.71818182 0.90909091 0.82275335 0.43007562]
|
|
|
|
mean value: 0.7919316917369833
|
|
|
|
key: train_mcc
|
|
value: [0.95874497 0.96890428 0.9690588 0.95894679 0.9690588 0.9690588
|
|
0.95896444 0.96906883 0.95896444 0.97927405]
|
|
|
|
mean value: 0.9660044199886203
|
|
|
|
key: test_accuracy
|
|
value: [0.90909091 0.86363636 1. 0.95238095 0.95238095 0.80952381
|
|
0.85714286 0.95238095 0.9047619 0.71428571]
|
|
|
|
mean value: 0.8915584415584416
|
|
|
|
key: train_accuracy
|
|
value: [0.97894737 0.98421053 0.98429319 0.97905759 0.98429319 0.98429319
|
|
0.97905759 0.98429319 0.97905759 0.9895288 ]
|
|
|
|
mean value: 0.9827032240286581
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.88 1. 0.95238095 0.95238095 0.81818182
|
|
0.85714286 0.95238095 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8995800865800866
|
|
|
|
key: train_fscore
|
|
value: [0.97938144 0.98445596 0.98461538 0.97959184 0.98461538 0.98461538
|
|
0.97938144 0.98445596 0.97938144 0.98958333]
|
|
|
|
mean value: 0.9830077570909534
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.78571429 1. 0.90909091 0.90909091 0.75
|
|
0.9 1. 0.84615385 0.69230769]
|
|
|
|
mean value: 0.8638511488511489
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
train_precision
|
|
value: [0.95959596 0.96938776 0.96969697 0.96 0.96969697 0.96969697
|
|
0.95959596 0.96938776 0.95959596 0.97938144]
|
|
|
|
mean value: 0.9666035741381839
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.9
|
|
0.81818182 0.90909091 1. 0.81818182]
|
|
|
|
mean value: 0.9445454545454546
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90909091 0.86363636 1. 0.95454545 0.95454545 0.81363636
|
|
0.85909091 0.95454545 0.9 0.70909091]
|
|
|
|
mean value: 0.8918181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [0.97894737 0.98421053 0.98421053 0.97894737 0.98421053 0.98421053
|
|
0.97916667 0.984375 0.97916667 0.98958333]
|
|
|
|
mean value: 0.982702850877193
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.78571429 1. 0.90909091 0.90909091 0.69230769
|
|
0.75 0.90909091 0.84615385 0.6 ]
|
|
|
|
mean value: 0.8247602397602397
|
|
|
|
key: train_jcc
|
|
value: [0.95959596 0.96938776 0.96969697 0.96 0.96969697 0.96969697
|
|
0.95959596 0.96938776 0.95959596 0.97938144]
|
|
|
|
mean value: 0.9666035741381839
|
|
|
|
MCC on Blind test: 0.87
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02339101 0.00911975 0.01014471 0.00981402 0.00911069 0.0097158
|
|
0.0100708 0.00990605 0.00931025 0.01011109]
|
|
|
|
mean value: 0.011069416999816895
|
|
|
|
key: score_time
|
|
value: [0.01295066 0.0090971 0.00953102 0.00900054 0.00888133 0.01002598
|
|
0.00940371 0.00870204 0.00876236 0.00936818]
|
|
|
|
mean value: 0.009572291374206543
|
|
|
|
key: test_mcc
|
|
value: [0.63636364 0.45454545 0.61818182 0.33636364 0.63305416 0.71562645
|
|
0.55161872 0.4719399 0.23373675 0.43007562]
|
|
|
|
mean value: 0.5081506148469601
|
|
|
|
key: train_mcc
|
|
value: [0.70920321 0.72063664 0.67566396 0.73823885 0.6859713 0.6859713
|
|
0.69638158 0.69638158 0.62349105 0.7403031 ]
|
|
|
|
mean value: 0.697224255471689
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.72727273 0.80952381 0.66666667 0.80952381 0.85714286
|
|
0.76190476 0.71428571 0.61904762 0.71428571]
|
|
|
|
mean value: 0.7497835497835498
|
|
|
|
key: train_accuracy
|
|
value: [0.85263158 0.85789474 0.83769634 0.86910995 0.84293194 0.84293194
|
|
0.84816754 0.84816754 0.81151832 0.86910995]
|
|
|
|
mean value: 0.8480159823642877
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.72727273 0.8 0.66666667 0.81818182 0.84210526
|
|
0.73684211 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7492583732057416
|
|
|
|
key: train_fscore
|
|
value: [0.86 0.86567164 0.84102564 0.87046632 0.84536082 0.84536082
|
|
0.84816754 0.84816754 0.80645161 0.87309645]
|
|
|
|
mean value: 0.850376839168251
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.72727273 0.8 0.63636364 0.75 0.88888889
|
|
0.875 0.85714286 0.61538462 0.69230769]
|
|
|
|
mean value: 0.7660542235542236
|
|
|
|
key: train_precision
|
|
value: [0.81904762 0.82075472 0.82828283 0.86597938 0.83673469 0.83673469
|
|
0.84375 0.84375 0.82417582 0.84313725]
|
|
|
|
mean value: 0.8362347012587765
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.72727273 0.8 0.7 0.9 0.8
|
|
0.63636364 0.54545455 0.72727273 0.81818182]
|
|
|
|
mean value: 0.7472727272727273
|
|
|
|
key: train_recall
|
|
value: [0.90526316 0.91578947 0.85416667 0.875 0.85416667 0.85416667
|
|
0.85263158 0.85263158 0.78947368 0.90526316]
|
|
|
|
mean value: 0.8658552631578947
|
|
|
|
key: test_roc_auc
|
|
value: [0.81818182 0.72727273 0.80909091 0.66818182 0.81363636 0.85454545
|
|
0.76818182 0.72272727 0.61363636 0.70909091]
|
|
|
|
mean value: 0.7504545454545455
|
|
|
|
key: train_roc_auc
|
|
value: [0.85263158 0.85789474 0.83760965 0.86907895 0.84287281 0.84287281
|
|
0.84819079 0.84819079 0.81140351 0.86929825]
|
|
|
|
mean value: 0.8480043859649123
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.57142857 0.66666667 0.5 0.69230769 0.72727273
|
|
0.58333333 0.5 0.5 0.6 ]
|
|
|
|
mean value: 0.6033316683316683
|
|
|
|
key: train_jcc
|
|
value: [0.75438596 0.76315789 0.72566372 0.7706422 0.73214286 0.73214286
|
|
0.73636364 0.73636364 0.67567568 0.77477477]
|
|
|
|
mean value: 0.7401313215761582
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.08275819 0.05508804 0.04714227 0.05396843 0.06033301 0.0590179
|
|
0.05804062 0.05956721 0.05484629 0.0539155 ]
|
|
|
|
mean value: 0.058467745780944824
|
|
|
|
key: score_time
|
|
value: [0.01187897 0.01033783 0.0107851 0.01045775 0.01141596 0.01055002
|
|
0.01059866 0.01087523 0.01029396 0.01083279]
|
|
|
|
mean value: 0.010802626609802246
|
|
|
|
key: test_mcc
|
|
value: [1. 0.73029674 0.90909091 1. 0.90909091 0.90909091
|
|
0.90829511 0.90909091 0.90829511 0.80909091]
|
|
|
|
mean value: 0.8992341501253261
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.86363636 0.95238095 1. 0.95238095 0.95238095
|
|
0.95238095 0.95238095 0.95238095 0.9047619 ]
|
|
|
|
mean value: 0.9482683982683983
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.86956522 0.95238095 1. 0.95238095 0.95238095
|
|
0.95652174 0.95238095 0.95652174 0.90909091]
|
|
|
|
mean value: 0.9501223414266893
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 0.90909091 1. 0.90909091 0.90909091
|
|
0.91666667 1. 0.91666667 0.90909091]
|
|
|
|
mean value: 0.9303030303030303
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.90909091 1. 1. 1. 1.
|
|
1. 0.90909091 1. 0.90909091]
|
|
|
|
mean value: 0.9727272727272727
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.86363636 0.95454545 1. 0.95454545 0.95454545
|
|
0.95 0.95454545 0.95 0.90454545]
|
|
|
|
mean value: 0.9486363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.76923077 0.90909091 1. 0.90909091 0.90909091
|
|
0.91666667 0.90909091 0.91666667 0.83333333]
|
|
|
|
mean value: 0.9072261072261072
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03119111 0.06622481 0.02681947 0.0669136 0.05784321 0.0625658
|
|
0.05946803 0.02557802 0.05062222 0.03925514]
|
|
|
|
mean value: 0.04864814281463623
|
|
|
|
key: score_time
|
|
value: [0.02157354 0.01243377 0.01237917 0.02732563 0.02043533 0.02117467
|
|
0.0124836 0.01243639 0.02221417 0.01236629]
|
|
|
|
mean value: 0.01748225688934326
|
|
|
|
key: test_mcc
|
|
value: [ 0.73029674 0.73029674 0.42727273 0.52295779 0.80909091 0.71562645
|
|
0.63305416 0.39196475 0.52727273 -0.05504819]
|
|
|
|
mean value: 0.5432784810726394
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 0.9895822
|
|
0.97927405 0.98958333 1. 1. ]
|
|
|
|
mean value: 0.9958439579407851
|
|
|
|
key: test_accuracy
|
|
value: [0.86363636 0.86363636 0.71428571 0.76190476 0.9047619 0.85714286
|
|
0.80952381 0.66666667 0.76190476 0.47619048]
|
|
|
|
mean value: 0.767965367965368
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.9947644 0.9895288
|
|
0.9947644 1. 1. ]
|
|
|
|
mean value: 0.9979057591623037
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.86956522 0.7 0.73684211 0.9 0.84210526
|
|
0.8 0.58823529 0.76190476 0.52173913]
|
|
|
|
mean value: 0.7589956989660853
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 0.99481865
|
|
0.98958333 0.9947644 1. 1. ]
|
|
|
|
mean value: 0.9979166384088833
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.7 0.77777778 0.9 0.88888889
|
|
0.88888889 0.83333333 0.8 0.5 ]
|
|
|
|
mean value: 0.7955555555555556
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.98969072
|
|
0.97938144 0.98958333 1. 1. ]
|
|
|
|
mean value: 0.9958655498281787
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.90909091 0.7 0.7 0.9 0.8
|
|
0.72727273 0.45454545 0.72727273 0.54545455]
|
|
|
|
mean value: 0.7372727272727273
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86363636 0.86363636 0.71363636 0.75909091 0.90454545 0.85454545
|
|
0.81363636 0.67727273 0.76363636 0.47272727]
|
|
|
|
mean value: 0.7686363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.99473684
|
|
0.98958333 0.99479167 1. 1. ]
|
|
|
|
mean value: 0.9979111842105263
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.76923077 0.53846154 0.58333333 0.81818182 0.72727273
|
|
0.66666667 0.41666667 0.61538462 0.35294118]
|
|
|
|
mean value: 0.6257370080899493
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 0.98969072
|
|
0.97938144 0.98958333 1. 1. ]
|
|
|
|
mean value: 0.9958655498281787
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0188179 0.01023531 0.01005864 0.00986576 0.00988483 0.01002455
|
|
0.0098505 0.01014495 0.00988603 0.00996375]
|
|
|
|
mean value: 0.010873222351074218
|
|
|
|
key: score_time
|
|
value: [0.01108694 0.00969553 0.00944519 0.00939345 0.00933337 0.00927711
|
|
0.00928974 0.00942636 0.00939441 0.0093236 ]
|
|
|
|
mean value: 0.009566569328308105
|
|
|
|
key: test_mcc
|
|
value: [0.54772256 0.46225016 0.90909091 0.4719399 0.82572282 0.52295779
|
|
0.44038551 0.90909091 0.52727273 0.13762047]
|
|
|
|
mean value: 0.5754053759176136
|
|
|
|
key: train_mcc
|
|
value: [0.6344324 0.67824625 0.62776058 0.7105481 0.6460861 0.66711224
|
|
0.6548207 0.63650874 0.61435486 0.69958718]
|
|
|
|
mean value: 0.6569457148583459
|
|
|
|
key: test_accuracy
|
|
value: [0.77272727 0.72727273 0.95238095 0.71428571 0.9047619 0.76190476
|
|
0.71428571 0.95238095 0.76190476 0.57142857]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.81578947 0.83684211 0.81151832 0.85340314 0.82198953 0.83246073
|
|
0.82722513 0.81675393 0.80628272 0.84816754]
|
|
|
|
mean value: 0.8270432626067787
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.75 0.95238095 0.75 0.90909091 0.73684211
|
|
0.7 0.95238095 0.76190476 0.60869565]
|
|
|
|
mean value: 0.7903904028846821
|
|
|
|
key: train_fscore
|
|
value: [0.8241206 0.84577114 0.82352941 0.86138614 0.83 0.84
|
|
0.82901554 0.8241206 0.81218274 0.85427136]
|
|
|
|
mean value: 0.8344397542629447
|
|
|
|
key: test_precision
|
|
value: [0.75 0.69230769 0.90909091 0.64285714 0.83333333 0.77777778
|
|
0.77777778 1. 0.8 0.58333333]
|
|
|
|
mean value: 0.7766477966477967
|
|
|
|
key: train_precision
|
|
value: [0.78846154 0.80188679 0.77777778 0.82075472 0.79807692 0.80769231
|
|
0.81632653 0.78846154 0.78431373 0.81730769]
|
|
|
|
mean value: 0.8001059543314181
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.81818182 1. 0.9 1. 0.7
|
|
0.63636364 0.90909091 0.72727273 0.63636364]
|
|
|
|
mean value: 0.8145454545454546
|
|
|
|
key: train_recall
|
|
value: [0.86315789 0.89473684 0.875 0.90625 0.86458333 0.875
|
|
0.84210526 0.86315789 0.84210526 0.89473684]
|
|
|
|
mean value: 0.8720833333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.77272727 0.72727273 0.95454545 0.72272727 0.90909091 0.75909091
|
|
0.71818182 0.95454545 0.76363636 0.56818182]
|
|
|
|
mean value: 0.785
|
|
|
|
key: train_roc_auc
|
|
value: [0.81578947 0.83684211 0.81118421 0.853125 0.82176535 0.83223684
|
|
0.82730263 0.81699561 0.8064693 0.84841009]
|
|
|
|
mean value: 0.8270120614035088
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.6 0.90909091 0.6 0.83333333 0.58333333
|
|
0.53846154 0.90909091 0.61538462 0.4375 ]
|
|
|
|
mean value: 0.6669051781551781
|
|
|
|
key: train_jcc
|
|
value: [0.7008547 0.73275862 0.7 0.75652174 0.70940171 0.72413793
|
|
0.7079646 0.7008547 0.68376068 0.74561404]
|
|
|
|
mean value: 0.7161868722583998
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01093888 0.01521778 0.01604748 0.0163331 0.0161612 0.01637101
|
|
0.01448059 0.01658916 0.01551127 0.01330352]
|
|
|
|
mean value: 0.015095400810241699
|
|
|
|
key: score_time
|
|
value: [0.00858784 0.01158476 0.01148224 0.01158595 0.01156044 0.01155162
|
|
0.01152015 0.01150441 0.01155853 0.01151228]
|
|
|
|
mean value: 0.011244821548461913
|
|
|
|
key: test_mcc
|
|
value: [0.73029674 0.63636364 0.90829511 0.71818182 0.74795759 0.63305416
|
|
0.71562645 0.46249729 0.66332496 0.45226702]
|
|
|
|
mean value: 0.6667864773402087
|
|
|
|
key: train_mcc
|
|
value: [0.88073886 0.92884073 0.95831967 0.95831967 0.92917291 0.94893045
|
|
0.91949402 0.94893045 0.83546973 0.73122789]
|
|
|
|
mean value: 0.9039444374117305
|
|
|
|
key: test_accuracy
|
|
value: [0.86363636 0.81818182 0.95238095 0.85714286 0.85714286 0.80952381
|
|
0.85714286 0.66666667 0.80952381 0.71428571]
|
|
|
|
mean value: 0.8205627705627705
|
|
|
|
key: train_accuracy
|
|
value: [0.93684211 0.96315789 0.97905759 0.97905759 0.96335079 0.97382199
|
|
0.95811518 0.97382199 0.91099476 0.84816754]
|
|
|
|
mean value: 0.9486387434554974
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.81818182 0.94736842 0.85714286 0.86956522 0.81818182
|
|
0.86956522 0.53333333 0.84615385 0.76923077]
|
|
|
|
mean value: 0.818586615520254
|
|
|
|
key: train_fscore
|
|
value: [0.93258427 0.96174863 0.97938144 0.97938144 0.96482412 0.97461929
|
|
0.95959596 0.97297297 0.9178744 0.86757991]
|
|
|
|
mean value: 0.9510562437463755
|
|
|
|
key: test_precision
|
|
value: [0.9 0.81818182 1. 0.81818182 0.76923077 0.75
|
|
0.83333333 1. 0.73333333 0.66666667]
|
|
|
|
mean value: 0.8288927738927739
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96938776 0.96938776 0.93203883 0.95049505
|
|
0.9223301 1. 0.84821429 0.76612903]
|
|
|
|
mean value: 0.9357982809720218
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.81818182 0.9 0.9 1. 0.9
|
|
0.90909091 0.36363636 1. 0.90909091]
|
|
|
|
mean value: 0.8518181818181818
|
|
|
|
key: train_recall
|
|
value: [0.87368421 0.92631579 0.98958333 0.98958333 1. 1.
|
|
1. 0.94736842 1. 1. ]
|
|
|
|
mean value: 0.9726535087719298
|
|
|
|
key: test_roc_auc
|
|
value: [0.86363636 0.81818182 0.95 0.85909091 0.86363636 0.81363636
|
|
0.85454545 0.68181818 0.8 0.70454545]
|
|
|
|
mean value: 0.8209090909090909
|
|
|
|
key: train_roc_auc
|
|
value: [0.93684211 0.96315789 0.97900219 0.97900219 0.96315789 0.97368421
|
|
0.95833333 0.97368421 0.91145833 0.84895833]
|
|
|
|
mean value: 0.9487280701754386
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.69230769 0.9 0.75 0.76923077 0.69230769
|
|
0.76923077 0.36363636 0.73333333 0.625 ]
|
|
|
|
mean value: 0.704504662004662
|
|
|
|
key: train_jcc
|
|
value: [0.87368421 0.92631579 0.95959596 0.95959596 0.93203883 0.95049505
|
|
0.9223301 0.94736842 0.84821429 0.76612903]
|
|
|
|
mean value: 0.9085767639760687
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01468706 0.01549625 0.01435971 0.01410246 0.01430082 0.01447487
|
|
0.01353121 0.01427817 0.01515913 0.01476002]
|
|
|
|
mean value: 0.014514970779418945
|
|
|
|
key: score_time
|
|
value: [0.01159954 0.0116024 0.01148248 0.01152968 0.01152968 0.01156878
|
|
0.01153922 0.01157212 0.01158714 0.01147294]
|
|
|
|
mean value: 0.011548399925231934
|
|
|
|
key: test_mcc
|
|
value: [0.68313005 0.56694671 0.82275335 0.82275335 0.67419986 0.52727273
|
|
0.71562645 0.46249729 0.80909091 0.33709993]
|
|
|
|
mean value: 0.6421370630446213
|
|
|
|
key: train_mcc
|
|
value: [0.65465367 0.74657689 0.91643821 0.82648365 0.74743893 0.92674636
|
|
0.92922547 0.75599417 0.9690588 0.85363527]
|
|
|
|
mean value: 0.8326251416033537
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.77272727 0.9047619 0.9047619 0.80952381 0.76190476
|
|
0.85714286 0.66666667 0.9047619 0.66666667]
|
|
|
|
mean value: 0.8067099567099567
|
|
|
|
key: train_accuracy
|
|
value: [0.8 0.85789474 0.95811518 0.90575916 0.85863874 0.96335079
|
|
0.96335079 0.86387435 0.98429319 0.92146597]
|
|
|
|
mean value: 0.9076742904381372
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.73684211 0.88888889 0.88888889 0.83333333 0.76190476
|
|
0.86956522 0.53333333 0.90909091 0.72 ]
|
|
|
|
mean value: 0.7919625215872356
|
|
|
|
key: train_fscore
|
|
value: [0.75 0.83435583 0.95789474 0.89655172 0.87671233 0.96373057
|
|
0.96446701 0.84146341 0.98395722 0.92682927]
|
|
|
|
mean value: 0.8995962095170513
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 1. 1. 0.71428571 0.72727273
|
|
0.83333333 1. 0.90909091 0.64285714]
|
|
|
|
mean value: 0.8701839826839827
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96808511 1. 0.7804878 0.95876289
|
|
0.93137255 1. 1. 0.86363636]
|
|
|
|
mean value: 0.9502344710514937
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.63636364 0.8 0.8 1. 0.8
|
|
0.90909091 0.36363636 0.90909091 0.81818182]
|
|
|
|
mean value: 0.7672727272727273
|
|
|
|
key: train_recall
|
|
value: [0.6 0.71578947 0.94791667 0.8125 1. 0.96875
|
|
1. 0.72631579 0.96842105 1. ]
|
|
|
|
mean value: 0.873969298245614
|
|
|
|
key: test_roc_auc
|
|
value: [0.81818182 0.77272727 0.9 0.9 0.81818182 0.76363636
|
|
0.85454545 0.68181818 0.90454545 0.65909091]
|
|
|
|
mean value: 0.8072727272727273
|
|
|
|
key: train_roc_auc
|
|
value: [0.8 0.85789474 0.95816886 0.90625 0.85789474 0.96332237
|
|
0.96354167 0.86315789 0.98421053 0.921875 ]
|
|
|
|
mean value: 0.9076315789473685
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.58333333 0.8 0.8 0.71428571 0.61538462
|
|
0.76923077 0.36363636 0.83333333 0.5625 ]
|
|
|
|
mean value: 0.6678067765567766
|
|
|
|
key: train_jcc
|
|
value: [0.6 0.71578947 0.91919192 0.8125 0.7804878 0.93
|
|
0.93137255 0.72631579 0.96842105 0.86363636]
|
|
|
|
mean value: 0.8247714952515414
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12453985 0.11010838 0.11050749 0.11169887 0.11265731 0.11060405
|
|
0.11088371 0.10958815 0.11039591 0.11021256]
|
|
|
|
mean value: 0.11211962699890136
|
|
|
|
key: score_time
|
|
value: [0.01472282 0.01498485 0.01611757 0.01495147 0.01522636 0.01489305
|
|
0.0149107 0.01497507 0.01493835 0.01501346]
|
|
|
|
mean value: 0.015073370933532716
|
|
|
|
key: test_mcc
|
|
value: [1. 0.83205029 0.90909091 0.82275335 0.90909091 0.90909091
|
|
0.90829511 1. 0.90829511 0.90829511]
|
|
|
|
mean value: 0.9106961691505756
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.90909091 0.95238095 0.9047619 0.95238095 0.95238095
|
|
0.95238095 1. 0.95238095 0.95238095]
|
|
|
|
mean value: 0.9528138528138528
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.91666667 0.95238095 0.88888889 0.95238095 0.95238095
|
|
0.95652174 1. 0.95652174 0.95652174]
|
|
|
|
mean value: 0.9532263630089717
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.84615385 0.90909091 1. 0.90909091 0.90909091
|
|
0.91666667 1. 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9323426573426573
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.8 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.98
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.90909091 0.95454545 0.9 0.95454545 0.95454545
|
|
0.95 1. 0.95 0.95 ]
|
|
|
|
mean value: 0.9522727272727273
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.84615385 0.90909091 0.8 0.90909091 0.90909091
|
|
0.91666667 1. 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9123426573426573
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.9
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03981161 0.04514956 0.04036641 0.04546928 0.03787112 0.03224587
|
|
0.04925346 0.03796482 0.03647232 0.0328362 ]
|
|
|
|
mean value: 0.03974406719207764
|
|
|
|
key: score_time
|
|
value: [0.01733565 0.03619933 0.03100276 0.02404404 0.01759744 0.03142309
|
|
0.02289605 0.02225947 0.02215791 0.02506065]
|
|
|
|
mean value: 0.024997639656066894
|
|
|
|
key: test_mcc
|
|
value: [1. 0.83205029 0.80909091 1. 0.90909091 1.
|
|
0.90829511 0.90909091 0.80909091 0.80909091]
|
|
|
|
mean value: 0.8985799946021636
|
|
|
|
key: train_mcc
|
|
value: [0.98952851 0.98952851 1. 0.9895822 1. 0.9895822
|
|
1. 0.98958333 1. 1. ]
|
|
|
|
mean value: 0.9947804741799376
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.90909091 0.9047619 1. 0.95238095 1.
|
|
0.95238095 0.95238095 0.9047619 0.9047619 ]
|
|
|
|
mean value: 0.948051948051948
|
|
|
|
key: train_accuracy
|
|
value: [0.99473684 0.99473684 1. 0.9947644 1. 0.9947644
|
|
1. 0.9947644 1. 1. ]
|
|
|
|
mean value: 0.9973766877927804
|
|
|
|
key: test_fscore
|
|
value: [1. 0.91666667 0.9 1. 0.95238095 1.
|
|
0.95652174 0.95238095 0.90909091 0.90909091]
|
|
|
|
mean value: 0.9496132128740824
|
|
|
|
key: train_fscore
|
|
value: [0.9947644 0.9947644 1. 0.99481865 1. 0.99481865
|
|
1. 0.9947644 1. 1. ]
|
|
|
|
mean value: 0.9973930499416759
|
|
|
|
key: test_precision
|
|
value: [1. 0.84615385 0.9 1. 0.90909091 1.
|
|
0.91666667 1. 0.90909091 0.90909091]
|
|
|
|
mean value: 0.939009324009324
|
|
|
|
key: train_precision
|
|
value: [0.98958333 0.98958333 1. 0.98969072 1. 0.98969072
|
|
1. 0.98958333 1. 1. ]
|
|
|
|
mean value: 0.9948131443298969
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.9 1. 1. 1.
|
|
1. 0.90909091 0.90909091 0.90909091]
|
|
|
|
mean value: 0.9627272727272728
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.90909091 0.90454545 1. 0.95454545 1.
|
|
0.95 0.95454545 0.90454545 0.90454545]
|
|
|
|
mean value: 0.9481818181818181
|
|
|
|
key: train_roc_auc
|
|
value: [0.99473684 0.99473684 1. 0.99473684 1. 0.99473684
|
|
1. 0.99479167 1. 1. ]
|
|
|
|
mean value: 0.997373903508772
|
|
|
|
key: test_jcc
|
|
value: [1. 0.84615385 0.81818182 1. 0.90909091 1.
|
|
0.91666667 0.90909091 0.83333333 0.83333333]
|
|
|
|
mean value: 0.9065850815850816
|
|
|
|
key: train_jcc
|
|
value: [0.98958333 0.98958333 1. 0.98969072 1. 0.98969072
|
|
1. 0.98958333 1. 1. ]
|
|
|
|
mean value: 0.9948131443298969
|
|
|
|
MCC on Blind test: 0.93
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05713534 0.08317208 0.0825665 0.08128595 0.05665421 0.06311941
|
|
0.07721305 0.05506492 0.0598321 0.06101608]
|
|
|
|
mean value: 0.06770596504211426
|
|
|
|
key: score_time
|
|
value: [0.02309966 0.02279806 0.02322483 0.02260709 0.01263595 0.02153444
|
|
0.02136874 0.02311778 0.0216713 0.02307773]
|
|
|
|
mean value: 0.02151355743408203
|
|
|
|
key: test_mcc
|
|
value: [0.46225016 0.36514837 0.90909091 0.43007562 0.82572282 0.42727273
|
|
0.06741999 0.53300179 0.82275335 0.13762047]
|
|
|
|
mean value: 0.49803562097870197
|
|
|
|
key: train_mcc
|
|
value: [0.98952851 1. 1. 1. 1. 0.98958333
|
|
1. 1. 0.9895822 0.9895822 ]
|
|
|
|
mean value: 0.9958276234546217
|
|
|
|
key: test_accuracy
|
|
value: [0.72727273 0.68181818 0.95238095 0.71428571 0.9047619 0.71428571
|
|
0.52380952 0.71428571 0.9047619 0.57142857]
|
|
|
|
mean value: 0.740909090909091
|
|
|
|
key: train_accuracy
|
|
value: [0.99473684 1. 1. 1. 1. 0.9947644
|
|
1. 1. 0.9947644 0.9947644 ]
|
|
|
|
mean value: 0.997903003582254
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.69565217 0.95238095 0.66666667 0.90909091 0.7
|
|
0.44444444 0.625 0.91666667 0.60869565]
|
|
|
|
mean value: 0.7218597465336596
|
|
|
|
key: train_fscore
|
|
value: [0.9947644 1. 1. 1. 1. 0.9947644
|
|
1. 1. 0.99470899 0.99470899]
|
|
|
|
mean value: 0.9978946785229508
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.66666667 0.90909091 0.75 0.83333333 0.7
|
|
0.57142857 1. 0.84615385 0.58333333]
|
|
|
|
mean value: 0.7637784437784437
|
|
|
|
key: train_precision
|
|
value: [0.98958333 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9989583333333334
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.72727273 1. 0.6 1. 0.7
|
|
0.36363636 0.45454545 1. 0.63636364]
|
|
|
|
mean value: 0.7118181818181818
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 0.98958333
|
|
1. 1. 0.98947368 0.98947368]
|
|
|
|
mean value: 0.9968530701754386
|
|
|
|
key: test_roc_auc
|
|
value: [0.72727273 0.68181818 0.95454545 0.70909091 0.90909091 0.71363636
|
|
0.53181818 0.72727273 0.9 0.56818182]
|
|
|
|
mean value: 0.7422727272727272
|
|
|
|
key: train_roc_auc
|
|
value: [0.99473684 1. 1. 1. 1. 0.99479167
|
|
1. 1. 0.99473684 0.99473684]
|
|
|
|
mean value: 0.9979002192982456
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.53333333 0.90909091 0.5 0.83333333 0.53846154
|
|
0.28571429 0.45454545 0.84615385 0.4375 ]
|
|
|
|
mean value: 0.5876594239094239
|
|
|
|
key: train_jcc
|
|
value: [0.98958333 1. 1. 1. 1. 0.98958333
|
|
1. 1. 0.98947368 0.98947368]
|
|
|
|
mean value: 0.995811403508772
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.35024142 0.32633758 0.34231949 0.32462835 0.33578348 0.33862019
|
|
0.33136415 0.35129523 0.33722878 0.32974505]
|
|
|
|
mean value: 0.33675637245178225
|
|
|
|
key: score_time
|
|
value: [0.00945258 0.00955868 0.0095489 0.00915074 0.0095396 0.00941062
|
|
0.00953364 0.01018429 0.00923729 0.00993609]
|
|
|
|
mean value: 0.009555244445800781
|
|
|
|
key: test_mcc
|
|
value: [1. 0.73029674 0.90909091 0.90829511 0.90909091 0.90909091
|
|
0.90829511 0.90909091 0.90829511 0.43007562]
|
|
|
|
mean value: 0.852162131379549
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.86363636 0.95238095 0.95238095 0.95238095 0.95238095
|
|
0.95238095 0.95238095 0.95238095 0.71428571]
|
|
|
|
mean value: 0.9244588744588744
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.86956522 0.95238095 0.94736842 0.95238095 0.95238095
|
|
0.95652174 0.95238095 0.95652174 0.75 ]
|
|
|
|
mean value: 0.9289500926228615
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 0.90909091 1. 0.90909091 0.90909091
|
|
0.91666667 1. 0.91666667 0.69230769]
|
|
|
|
mean value: 0.9086247086247086
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.90909091 1. 0.9 1. 1.
|
|
1. 0.90909091 1. 0.81818182]
|
|
|
|
mean value: 0.9536363636363636
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.86363636 0.95454545 0.95 0.95454545 0.95454545
|
|
0.95 0.95454545 0.95 0.70909091]
|
|
|
|
mean value: 0.9240909090909091
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.76923077 0.90909091 0.9 0.90909091 0.90909091
|
|
0.91666667 0.90909091 0.91666667 0.6 ]
|
|
|
|
mean value: 0.8738927738927739
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.86
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02344608 0.02274013 0.02399993 0.0233686 0.02374434 0.02332354
|
|
0.02358508 0.02412319 0.02367616 0.02417731]
|
|
|
|
mean value: 0.023618435859680174
|
|
|
|
key: score_time
|
|
value: [0.01234913 0.01238394 0.01759195 0.01474524 0.01687384 0.01484942
|
|
0.01608396 0.02134705 0.01809335 0.01308537]
|
|
|
|
mean value: 0.015740323066711425
|
|
|
|
key: test_mcc
|
|
value: [0.27272727 0. 0.33028913 0.05504819 0.44038551 0.33028913
|
|
0.15894099 0.23373675 0.43007562 0.15894099]
|
|
|
|
mean value: 0.24104335656188314
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.63636364 0.5 0.66666667 0.52380952 0.71428571 0.66666667
|
|
0.57142857 0.61904762 0.71428571 0.57142857]
|
|
|
|
mean value: 0.6183982683982684
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.63636364 0.52173913 0.63157895 0.54545455 0.72727273 0.63157895
|
|
0.52631579 0.66666667 0.75 0.52631579]
|
|
|
|
mean value: 0.6163286179876569
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.5 0.66666667 0.5 0.66666667 0.66666667
|
|
0.625 0.61538462 0.69230769 0.625 ]
|
|
|
|
mean value: 0.6194055944055944
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.54545455 0.6 0.6 0.8 0.6
|
|
0.45454545 0.72727273 0.81818182 0.45454545]
|
|
|
|
mean value: 0.6236363636363637
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.63636364 0.5 0.66363636 0.52727273 0.71818182 0.66363636
|
|
0.57727273 0.61363636 0.70909091 0.57727273]
|
|
|
|
mean value: 0.6186363636363637
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.46666667 0.35294118 0.46153846 0.375 0.57142857 0.46153846
|
|
0.35714286 0.5 0.6 0.35714286]
|
|
|
|
mean value: 0.4503399051928464
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.01
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03454781 0.03430343 0.03426123 0.03424621 0.03509831 0.03485656
|
|
0.0354929 0.03958178 0.04269385 0.03967881]
|
|
|
|
mean value: 0.03647608757019043
|
|
|
|
key: score_time
|
|
value: [0.03377104 0.02306747 0.02031469 0.02201653 0.02278471 0.02362514
|
|
0.0235343 0.02275467 0.02374935 0.02373099]
|
|
|
|
mean value: 0.02393488883972168
|
|
|
|
key: test_mcc
|
|
value: [0.83205029 0.63636364 0.80909091 0.71818182 0.90909091 0.80909091
|
|
0.90829511 0.63305416 0.71562645 0.42727273]
|
|
|
|
mean value: 0.7398116921937792
|
|
|
|
key: train_mcc
|
|
value: [0.94784115 0.95810708 0.95894679 0.9690588 0.95894679 0.94810203
|
|
0.95896444 0.95832877 0.95896444 0.97927405]
|
|
|
|
mean value: 0.9596534335646937
|
|
|
|
key: test_accuracy
|
|
value: [0.90909091 0.81818182 0.9047619 0.85714286 0.95238095 0.9047619
|
|
0.95238095 0.80952381 0.85714286 0.71428571]
|
|
|
|
mean value: 0.8679653679653679
|
|
|
|
key: train_accuracy
|
|
value: [0.97368421 0.97894737 0.97905759 0.98429319 0.97905759 0.97382199
|
|
0.97905759 0.97905759 0.97905759 0.9895288 ]
|
|
|
|
mean value: 0.9795563516120144
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.81818182 0.9 0.85714286 0.95238095 0.9
|
|
0.95652174 0.8 0.86956522 0.72727273]
|
|
|
|
mean value: 0.8697731978166761
|
|
|
|
key: train_fscore
|
|
value: [0.97409326 0.97916667 0.97959184 0.98461538 0.97959184 0.97435897
|
|
0.97938144 0.97916667 0.97938144 0.98958333]
|
|
|
|
mean value: 0.9798930849957056
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.81818182 0.9 0.81818182 0.90909091 0.9
|
|
0.91666667 0.88888889 0.83333333 0.72727273]
|
|
|
|
mean value: 0.8557770007770008
|
|
|
|
key: train_precision
|
|
value: [0.95918367 0.96907216 0.96 0.96969697 0.96 0.95959596
|
|
0.95959596 0.96907216 0.95959596 0.97938144]
|
|
|
|
mean value: 0.9645194295150112
|
|
|
|
key: test_recall
|
|
value: [1. 0.81818182 0.9 0.9 1. 0.9
|
|
1. 0.72727273 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8881818181818182
|
|
|
|
key: train_recall /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
value: [0.98947368 0.98947368 1. 1. 1. 0.98958333
|
|
1. 0.98947368 1. 1. ]
|
|
|
|
mean value: 0.9958004385964913
|
|
|
|
key: test_roc_auc
|
|
value: [0.90909091 0.81818182 0.90454545 0.85909091 0.95454545 0.90454545
|
|
0.95 0.81363636 0.85454545 0.71363636]
|
|
|
|
mean value: 0.8681818181818182
|
|
|
|
key: train_roc_auc
|
|
value: [0.97368421 0.97894737 0.97894737 0.98421053 0.97894737 0.97373904
|
|
0.97916667 0.97911184 0.97916667 0.98958333]
|
|
|
|
mean value: 0.9795504385964913
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.69230769 0.81818182 0.75 0.90909091 0.81818182
|
|
0.91666667 0.66666667 0.76923077 0.57142857]
|
|
|
|
mean value: 0.7757908757908758
|
|
|
|
key: train_jcc
|
|
value: [0.94949495 0.95918367 0.96 0.96969697 0.96 0.95
|
|
0.95959596 0.95918367 0.95959596 0.97938144]
|
|
|
|
mean value: 0.9606132628621583
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.26996922 0.23063183 0.27355933 0.23035693 0.261343 0.21444583
|
|
0.22415519 0.27059078 0.30639648 0.24822783]
|
|
|
|
mean value: 0.25296764373779296
|
|
|
|
key: score_time
|
|
value: [0.02385116 0.02095103 0.02128196 0.02257371 0.02106357 0.02217174
|
|
0.02236676 0.02201104 0.02247214 0.02237701]
|
|
|
|
mean value: 0.022112011909484863
|
|
|
|
key: test_mcc
|
|
value: [0.83205029 0.63636364 0.80909091 0.71818182 0.90909091 0.80909091
|
|
0.90829511 0.63305416 0.71562645 0.42727273]
|
|
|
|
mean value: 0.7398116921937792
|
|
|
|
key: train_mcc
|
|
value: [0.94784115 0.95810708 0.95894679 0.9690588 0.95894679 0.94810203
|
|
0.95896444 0.95832877 0.95896444 0.97927405]
|
|
|
|
mean value: 0.9596534335646937
|
|
|
|
key: test_accuracy
|
|
value: [0.90909091 0.81818182 0.9047619 0.85714286 0.95238095 0.9047619
|
|
0.95238095 0.80952381 0.85714286 0.71428571]
|
|
|
|
mean value: 0.8679653679653679
|
|
|
|
key: train_accuracy
|
|
value: [0.97368421 0.97894737 0.97905759 0.98429319 0.97905759 0.97382199
|
|
0.97905759 0.97905759 0.97905759 0.9895288 ]
|
|
|
|
mean value: 0.9795563516120144
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.81818182 0.9 0.85714286 0.95238095 0.9
|
|
0.95652174 0.8 0.86956522 0.72727273]
|
|
|
|
mean value: 0.8697731978166761
|
|
|
|
key: train_fscore
|
|
value: [0.97409326 0.97916667 0.97959184 0.98461538 0.97959184 0.97435897
|
|
0.97938144 0.97916667 0.97938144 0.98958333]
|
|
|
|
mean value: 0.9798930849957056
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.81818182 0.9 0.81818182 0.90909091 0.9
|
|
0.91666667 0.88888889 0.83333333 0.72727273]
|
|
|
|
mean value: 0.8557770007770008
|
|
|
|
key: train_precision
|
|
value: [0.95918367 0.96907216 0.96 0.96969697 0.96 0.95959596
|
|
0.95959596 0.96907216 0.95959596 0.97938144]
|
|
|
|
mean value: 0.9645194295150112
|
|
|
|
key: test_recall
|
|
value: [1. 0.81818182 0.9 0.9 1. 0.9
|
|
1. 0.72727273 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8881818181818182
|
|
|
|
key: train_recall
|
|
value: [0.98947368 0.98947368 1. 1. 1. 0.98958333
|
|
1. 0.98947368 1. 1. ]
|
|
|
|
mean value: 0.9958004385964913
|
|
|
|
key: test_roc_auc
|
|
value: [0.90909091 0.81818182 0.90454545 0.85909091 0.95454545 0.90454545
|
|
0.95 0.81363636 0.85454545 0.71363636]
|
|
|
|
mean value: 0.8681818181818182
|
|
|
|
key: train_roc_auc
|
|
value: [0.97368421 0.97894737 0.97894737 0.98421053 0.97894737 0.97373904
|
|
0.97916667 0.97911184 0.97916667 0.98958333]
|
|
|
|
mean value: 0.9795504385964913
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.69230769 0.81818182 0.75 0.90909091 0.81818182
|
|
0.91666667 0.66666667 0.76923077 0.57142857]
|
|
|
|
mean value: 0.7757908757908758
|
|
|
|
key: train_jcc
|
|
value: [0.94949495 0.95918367 0.96 0.96969697 0.96 0.95
|
|
0.95959596 0.95918367 0.95959596 0.97938144]
|
|
|
|
mean value: 0.9606132628621583
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0495255 0.04171205 0.05164695 0.03768301 0.03714681 0.04049611
|
|
0.05802965 0.06608534 0.03809094 0.03966403]
|
|
|
|
mean value: 0.04600803852081299
|
|
|
|
key: score_time
|
|
value: [0.01200128 0.01423216 0.0186379 0.01211715 0.01423645 0.01453924
|
|
0.01463461 0.01525044 0.01495218 0.01509023]
|
|
|
|
mean value: 0.01456916332244873
|
|
|
|
key: test_mcc
|
|
value: [0.81322028 0.82462113 0.61152662 0.90692382 0.80817439 0.90692382
|
|
0.85441771 0.7197263 0.7098505 0.86240942]
|
|
|
|
mean value: 0.8017793998138952
|
|
|
|
key: train_mcc
|
|
value: [0.91372712 0.91893234 0.91395353 0.90846996 0.90846996 0.91379661
|
|
0.89790701 0.91380162 0.913746 0.93536575]
|
|
|
|
mean value: 0.9138169912072902
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.9047619 0.80487805 0.95121951 0.90243902 0.95121951
|
|
0.92682927 0.85365854 0.85365854 0.92682927]
|
|
|
|
mean value: 0.8980255516840883
|
|
|
|
key: train_accuracy
|
|
value: [0.95675676 0.95945946 0.95687332 0.9541779 0.9541779 0.95687332
|
|
0.94878706 0.95687332 0.95687332 0.96765499]
|
|
|
|
mean value: 0.956850732133751
|
|
|
|
key: test_fscore
|
|
value: [0.9 0.89473684 0.78947368 0.95238095 0.89473684 0.95238095
|
|
0.93023256 0.86956522 0.86363636 0.93333333]
|
|
|
|
mean value: 0.8980476745683493
|
|
|
|
key: train_fscore
|
|
value: [0.95721925 0.95934959 0.95744681 0.95466667 0.95466667 0.95721925
|
|
0.94933333 0.95698925 0.95675676 0.96774194]
|
|
|
|
mean value: 0.9571389510899493
|
|
|
|
key: test_precision
|
|
value: [0.94736842 1. 0.83333333 0.90909091 0.94444444 0.90909091
|
|
0.90909091 0.8 0.82608696 0.875 ]
|
|
|
|
mean value: 0.8953505882624876
|
|
|
|
key: train_precision
|
|
value: [0.94708995 0.96195652 0.94736842 0.94708995 0.94708995 0.95212766
|
|
0.93684211 0.95187166 0.95675676 0.96256684]
|
|
|
|
mean value: 0.9510759808329783
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.80952381 0.75 1. 0.85 1.
|
|
0.95238095 0.95238095 0.9047619 1. ]
|
|
|
|
mean value: 0.9076190476190475
|
|
|
|
key: train_recall
|
|
value: [0.96756757 0.95675676 0.96774194 0.96236559 0.96236559 0.96236559
|
|
0.96216216 0.96216216 0.95675676 0.97297297]
|
|
|
|
mean value: 0.9633217088055798
|
|
|
|
key: test_roc_auc
|
|
value: [0.9047619 0.9047619 0.80357143 0.95238095 0.90119048 0.95238095
|
|
0.92619048 0.85119048 0.85238095 0.925 ]
|
|
|
|
mean value: 0.8973809523809524
|
|
|
|
key: train_roc_auc
|
|
value: [0.95675676 0.95945946 0.95684394 0.95415577 0.95415577 0.95685847
|
|
0.94882302 0.95688753 0.956873 0.96766928]
|
|
|
|
mean value: 0.9568482999128161
|
|
|
|
key: test_jcc
|
|
value: [0.81818182 0.80952381 0.65217391 0.90909091 0.80952381 0.90909091
|
|
0.86956522 0.76923077 0.76 0.875 ]
|
|
|
|
mean value: 0.8181381155076808
|
|
|
|
key: train_jcc
|
|
value: [0.91794872 0.921875 0.91836735 0.91326531 0.91326531 0.91794872
|
|
0.9035533 0.91752577 0.91709845 0.9375 ]
|
|
|
|
mean value: 0.9178347913365226
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.09578729 1.01711416 1.37589264 0.92109752 0.99411583 0.98509765
|
|
1.15172482 1.26849198 1.10450602 0.88261199]
|
|
|
|
mean value: 1.0796439886093139
|
|
|
|
key: score_time
|
|
value: [0.01467395 0.02109146 0.0124011 0.01224828 0.01224065 0.01213932
|
|
0.0122056 0.0121479 0.01539993 0.01800776]
|
|
|
|
mean value: 0.014255595207214356
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.90889326 0.90649828 0.95227002 0.95227002 0.95227002
|
|
0.95238095 1. 0.80817439 0.90649828]
|
|
|
|
mean value: 0.9292717796884236
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.95238095 0.95121951 0.97560976 0.97560976 0.97560976
|
|
0.97560976 1. 0.90243902 0.95121951]
|
|
|
|
mean value: 0.963588850174216
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.95 0.94736842 0.97435897 0.97435897 0.97435897
|
|
0.97560976 1. 0.90909091 0.95454545]
|
|
|
|
mean value: 0.963530121996104
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.86956522 0.91304348]
|
|
|
|
mean value: 0.9782608695652174
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.9047619 0.9 0.95 0.95 0.95
|
|
0.95238095 1. 0.95238095 1. ]
|
|
|
|
mean value: 0.9511904761904761
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.95238095 0.95 0.975 0.975 0.975
|
|
0.97619048 1. 0.90119048 0.95 ]
|
|
|
|
mean value: 0.9630952380952381
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.9047619 0.9 0.95 0.95 0.95
|
|
0.95238095 1. 0.83333333 0.91304348]
|
|
|
|
mean value: 0.9305900621118012
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01408195 0.01117468 0.00955725 0.00979543 0.00963759 0.00944567
|
|
0.00941515 0.00972009 0.00966501 0.00958419]
|
|
|
|
mean value: 0.010207700729370116
|
|
|
|
key: score_time
|
|
value: [0.01482582 0.00992823 0.0092001 0.00906563 0.00890923 0.00876284
|
|
0.00872135 0.00879741 0.00879622 0.00883293]
|
|
|
|
mean value: 0.00958397388458252
|
|
|
|
key: test_mcc
|
|
value: [0.38490018 0.62187434 0.65871309 0.6806903 0.66668392 0.42516543
|
|
0.36718832 0.42916625 0.61969655 0.46623254]
|
|
|
|
mean value: 0.5320310918576006
|
|
|
|
key: train_mcc
|
|
value: [0.60203744 0.59709223 0.6477664 0.52948819 0.59363692 0.55833251
|
|
0.55570017 0.59043621 0.6092626 0.58611464]
|
|
|
|
mean value: 0.5869867305151683
|
|
|
|
key: test_accuracy
|
|
value: [0.69047619 0.80952381 0.82926829 0.82926829 0.82926829 0.70731707
|
|
0.68292683 0.70731707 0.80487805 0.73170732]
|
|
|
|
mean value: 0.7621951219512195
|
|
|
|
key: train_accuracy
|
|
value: [0.7972973 0.79459459 0.81940701 0.76010782 0.79514825 0.77628032
|
|
0.77358491 0.79245283 0.80053908 0.78706199]
|
|
|
|
mean value: 0.789647410213448
|
|
|
|
key: test_fscore
|
|
value: [0.71111111 0.81818182 0.82051282 0.84444444 0.8372093 0.72727273
|
|
0.71111111 0.75 0.82608696 0.75555556]
|
|
|
|
mean value: 0.7801485847036909
|
|
|
|
key: train_fscore
|
|
value: [0.81203008 0.81 0.8337469 0.78132678 0.80612245 0.79197995
|
|
0.79104478 0.80506329 0.815 0.80589681]
|
|
|
|
mean value: 0.8052211026787506
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.7826087 0.84210526 0.76 0.7826087 0.66666667
|
|
0.66666667 0.66666667 0.76 0.70833333]
|
|
|
|
mean value: 0.7302322654462242
|
|
|
|
key: train_precision
|
|
value: [0.75700935 0.75348837 0.77419355 0.71945701 0.76699029 0.74178404
|
|
0.73271889 0.75714286 0.75813953 0.73873874]
|
|
|
|
mean value: 0.7499662633444528
|
|
|
|
key: test_recall
|
|
value: [0.76190476 0.85714286 0.8 0.95 0.9 0.8
|
|
0.76190476 0.85714286 0.9047619 0.80952381]
|
|
|
|
mean value: 0.8402380952380952
|
|
|
|
key: train_recall
|
|
value: [0.87567568 0.87567568 0.90322581 0.85483871 0.84946237 0.84946237
|
|
0.85945946 0.85945946 0.88108108 0.88648649]
|
|
|
|
mean value: 0.8694827085149666
|
|
|
|
key: test_roc_auc
|
|
value: [0.69047619 0.80952381 0.82857143 0.83214286 0.83095238 0.70952381
|
|
0.68095238 0.70357143 0.80238095 0.7297619 ]
|
|
|
|
mean value: 0.7617857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.7972973 0.79459459 0.81918047 0.75985179 0.79500145 0.77608253
|
|
0.77381575 0.79263296 0.80075559 0.78732926]
|
|
|
|
mean value: 0.7896541702993316
|
|
|
|
key: test_jcc
|
|
value: [0.55172414 0.69230769 0.69565217 0.73076923 0.72 0.57142857
|
|
0.55172414 0.6 0.7037037 0.60714286]
|
|
|
|
mean value: 0.6424452505127167
|
|
|
|
key: train_jcc
|
|
value: [0.6835443 0.68067227 0.71489362 0.64112903 0.67521368 0.65560166
|
|
0.65432099 0.67372881 0.68776371 0.67489712]
|
|
|
|
mean value: 0.6741765190584461
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00980854 0.00986242 0.00992036 0.00991201 0.00991249 0.00991249
|
|
0.00982308 0.00993419 0.00997567 0.00992012]
|
|
|
|
mean value: 0.009898138046264649
|
|
|
|
key: score_time
|
|
value: [0.00885415 0.00884795 0.00889301 0.00889349 0.00893068 0.00886345
|
|
0.00891495 0.00888014 0.00895143 0.00895309]
|
|
|
|
mean value: 0.008898234367370606
|
|
|
|
key: test_mcc
|
|
value: [0.57207755 0.52620136 0.36718832 0.72229808 0.7098505 0.6806903
|
|
0.65871309 0.41428571 0.51320273 0.51190476]
|
|
|
|
mean value: 0.5676412422906144
|
|
|
|
key: train_mcc
|
|
value: [0.63807092 0.62969126 0.6558879 0.6516517 0.68216317 0.64716482
|
|
0.68245673 0.68195292 0.68220933 0.66900863]
|
|
|
|
mean value: 0.662025738279246
|
|
|
|
key: test_accuracy
|
|
value: [0.78571429 0.76190476 0.68292683 0.85365854 0.85365854 0.82926829
|
|
0.82926829 0.70731707 0.75609756 0.75609756]
|
|
|
|
mean value: 0.7815911730545877
|
|
|
|
key: train_accuracy
|
|
value: [0.81891892 0.81351351 0.82749326 0.82479784 0.84097035 0.82210243
|
|
0.84097035 0.84097035 0.84097035 0.8328841 ]
|
|
|
|
mean value: 0.8303591462082028
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.77272727 0.64864865 0.86363636 0.84210526 0.84444444
|
|
0.8372093 0.71428571 0.77272727 0.76190476]
|
|
|
|
mean value: 0.7848386718276559
|
|
|
|
key: train_fscore
|
|
value: [0.82133333 0.82170543 0.83246073 0.83204134 0.84350133 0.83076923
|
|
0.84350133 0.84097035 0.84266667 0.84020619]
|
|
|
|
mean value: 0.8349155922270581
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.73913043 0.70588235 0.79166667 0.88888889 0.76
|
|
0.81818182 0.71428571 0.73913043 0.76190476]
|
|
|
|
mean value: 0.7691798345161517
|
|
|
|
key: train_precision
|
|
value: [0.81052632 0.78712871 0.81122449 0.80099502 0.83246073 0.79411765
|
|
0.828125 0.83870968 0.83157895 0.80295567]
|
|
|
|
mean value: 0.8137822213187824
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.80952381 0.6 0.95 0.8 0.95
|
|
0.85714286 0.71428571 0.80952381 0.76190476]
|
|
|
|
mean value: 0.8061904761904761
|
|
|
|
key: train_recall
|
|
value: [0.83243243 0.85945946 0.85483871 0.8655914 0.85483871 0.87096774
|
|
0.85945946 0.84324324 0.85405405 0.88108108]
|
|
|
|
mean value: 0.8575966288869514
|
|
|
|
key: test_roc_auc
|
|
value: [0.78571429 0.76190476 0.68095238 0.85595238 0.85238095 0.83214286
|
|
0.82857143 0.70714286 0.7547619 0.75595238]
|
|
|
|
mean value: 0.781547619047619
|
|
|
|
key: train_roc_auc
|
|
value: [0.81891892 0.81351351 0.82741935 0.82468759 0.84093287 0.82197036
|
|
0.84102005 0.84097646 0.84100552 0.83301366]
|
|
|
|
mean value: 0.8303458297006684
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.62962963 0.48 0.76 0.72727273 0.73076923
|
|
0.72 0.55555556 0.62962963 0.61538462]
|
|
|
|
mean value: 0.6502087542087542
|
|
|
|
key: train_jcc
|
|
value: [0.69683258 0.69736842 0.71300448 0.71238938 0.7293578 0.71052632
|
|
0.7293578 0.7255814 0.7281106 0.72444444]
|
|
|
|
mean value: 0.716697321606543
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00937152 0.00930285 0.00922108 0.00923443 0.00945258 0.00900841
|
|
0.00912046 0.01025939 0.01043296 0.01040459]
|
|
|
|
mean value: 0.00958082675933838
|
|
|
|
key: score_time
|
|
value: [0.01154137 0.01131701 0.01124048 0.01131988 0.01132011 0.01105022
|
|
0.01111436 0.01203346 0.01216817 0.01221538]
|
|
|
|
mean value: 0.011532044410705567
|
|
|
|
key: test_mcc
|
|
value: [0.248452 0.57735027 0.01756821 0.65871309 0.61152662 0.47439956
|
|
0.61152662 0.26730386 0.38060103 0.6133669 ]
|
|
|
|
mean value: 0.446080816017905
|
|
|
|
key: train_mcc
|
|
value: [0.62220365 0.67248442 0.64889279 0.63017348 0.61050859 0.64734861
|
|
0.68042218 0.6676026 0.61385458 0.6558879 ]
|
|
|
|
mean value: 0.6449378798016933
|
|
|
|
key: test_accuracy
|
|
value: [0.61904762 0.78571429 0.51219512 0.82926829 0.80487805 0.73170732
|
|
0.80487805 0.63414634 0.68292683 0.80487805]
|
|
|
|
mean value: 0.7209639953542393
|
|
|
|
key: train_accuracy
|
|
value: [0.81081081 0.83513514 0.82210243 0.81401617 0.8032345 0.82210243
|
|
0.83827493 0.8328841 0.8032345 0.82749326]
|
|
|
|
mean value: 0.8209288264005246
|
|
|
|
key: test_fscore
|
|
value: [0.55555556 0.76923077 0.41176471 0.82051282 0.78947368 0.68571429
|
|
0.81818182 0.65116279 0.64864865 0.8 ]
|
|
|
|
mean value: 0.6950245078634452
|
|
|
|
key: train_fscore
|
|
value: [0.80662983 0.82816901 0.81142857 0.80672269 0.79202279 0.81355932
|
|
0.82857143 0.8258427 0.78592375 0.82222222]
|
|
|
|
mean value: 0.8121092323988096
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.83333333 0.5 0.84210526 0.83333333 0.8
|
|
0.7826087 0.63636364 0.75 0.84210526]
|
|
|
|
mean value: 0.7486516191664934
|
|
|
|
key: train_precision
|
|
value: [0.82485876 0.86470588 0.86585366 0.84210526 0.84242424 0.85714286
|
|
0.87878788 0.85964912 0.85897436 0.84571429]
|
|
|
|
mean value: 0.8540216306960209
|
|
|
|
key: test_recall
|
|
value: [0.47619048 0.71428571 0.35 0.8 0.75 0.6
|
|
0.85714286 0.66666667 0.57142857 0.76190476]
|
|
|
|
mean value: 0.6547619047619048
|
|
|
|
key: train_recall
|
|
value: [0.78918919 0.79459459 0.76344086 0.77419355 0.74731183 0.77419355
|
|
0.78378378 0.79459459 0.72432432 0.8 ]
|
|
|
|
mean value: 0.7745626271432723
|
|
|
|
key: test_roc_auc
|
|
value: [0.61904762 0.78571429 0.50833333 0.82857143 0.80357143 0.72857143
|
|
0.80357143 0.63333333 0.68571429 0.80595238]
|
|
|
|
mean value: 0.7202380952380952
|
|
|
|
key: train_roc_auc
|
|
value: [0.81081081 0.83513514 0.82226097 0.8141238 0.80338564 0.82223191
|
|
0.83812845 0.83278117 0.80302238 0.82741935]
|
|
|
|
mean value: 0.8209299622202848
|
|
|
|
key: test_jcc
|
|
value: [0.38461538 0.625 0.25925926 0.69565217 0.65217391 0.52173913
|
|
0.69230769 0.48275862 0.48 0.66666667]
|
|
|
|
mean value: 0.5460172840929962
|
|
|
|
key: train_jcc
|
|
value: [0.67592593 0.70673077 0.68269231 0.67605634 0.65566038 0.68571429
|
|
0.70731707 0.70334928 0.647343 0.69811321]
|
|
|
|
mean value: 0.6838902562133582
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01808858 0.01731157 0.01881266 0.01991796 0.01996613 0.01853418
|
|
0.01932144 0.01959515 0.01970315 0.01968789]
|
|
|
|
mean value: 0.019093871116638184
|
|
|
|
key: score_time
|
|
value: [0.01118565 0.01099086 0.01180649 0.01189542 0.01190186 0.01185799
|
|
0.01187539 0.01184416 0.01220989 0.01184678]
|
|
|
|
mean value: 0.011741447448730468
|
|
|
|
key: test_mcc
|
|
value: [0.80952381 0.58834841 0.51190476 0.78072006 0.70714286 0.72229808
|
|
0.67700771 0.7197263 0.65871309 0.73786479]
|
|
|
|
mean value: 0.6913249867860977
|
|
|
|
key: train_mcc
|
|
value: [0.84533292 0.7964953 0.83490488 0.79583079 0.81394491 0.80073631
|
|
0.82483989 0.80086091 0.82989307 0.7959641 ]
|
|
|
|
mean value: 0.8138803081683508
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.78571429 0.75609756 0.87804878 0.85365854 0.85365854
|
|
0.82926829 0.85365854 0.82926829 0.85365854]
|
|
|
|
mean value: 0.8397793263646922
|
|
|
|
key: train_accuracy
|
|
value: [0.92162162 0.89459459 0.91644205 0.89487871 0.90566038 0.89757412
|
|
0.91105121 0.89757412 0.91374663 0.89487871]
|
|
|
|
mean value: 0.9048022146135354
|
|
|
|
key: test_fscore
|
|
value: [0.9047619 0.80851064 0.75 0.88888889 0.85 0.86363636
|
|
0.85106383 0.86956522 0.8372093 0.875 ]
|
|
|
|
mean value: 0.8498636145089149
|
|
|
|
key: train_fscore
|
|
value: [0.92428198 0.90126582 0.91948052 0.90126582 0.90956072 0.9035533
|
|
0.91428571 0.90306122 0.91666667 0.90076336]
|
|
|
|
mean value: 0.9094185136611744
|
|
|
|
key: test_precision
|
|
value: [0.9047619 0.73076923 0.75 0.8 0.85 0.79166667
|
|
0.76923077 0.8 0.81818182 0.77777778]
|
|
|
|
mean value: 0.7992388167388167
|
|
|
|
key: train_precision
|
|
value: [0.89393939 0.84761905 0.88944724 0.85167464 0.87562189 0.85576923
|
|
0.88 0.85507246 0.88442211 0.85096154]
|
|
|
|
mean value: 0.8684527552986584
|
|
|
|
key: test_recall
|
|
value: [0.9047619 0.9047619 0.75 1. 0.85 0.95
|
|
0.95238095 0.95238095 0.85714286 1. ]
|
|
|
|
mean value: 0.9121428571428571
|
|
|
|
key: train_recall
|
|
value: [0.95675676 0.96216216 0.9516129 0.95698925 0.94623656 0.95698925
|
|
0.95135135 0.95675676 0.95135135 0.95675676]
|
|
|
|
mean value: 0.9546963092124383
|
|
|
|
key: test_roc_auc
|
|
value: [0.9047619 0.78571429 0.75595238 0.88095238 0.85357143 0.85595238
|
|
0.82619048 0.85119048 0.82857143 0.85 ]
|
|
|
|
mean value: 0.8392857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.92162162 0.89459459 0.91634699 0.89471084 0.90555071 0.89741354
|
|
0.91115955 0.89773322 0.91384772 0.89504505]
|
|
|
|
mean value: 0.9048023830281895
|
|
|
|
key: test_jcc
|
|
value: [0.82608696 0.67857143 0.6 0.8 0.73913043 0.76
|
|
0.74074074 0.76923077 0.72 0.77777778]
|
|
|
|
mean value: 0.7411538107625064
|
|
|
|
key: train_jcc
|
|
value: [0.8592233 0.8202765 0.85096154 0.8202765 0.83412322 0.82407407
|
|
0.84210526 0.82325581 0.84615385 0.81944444]
|
|
|
|
mean value: 0.833989449935668
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.48036051 1.6958499 1.75260854 1.34077454 1.51118517 1.51683521
|
|
1.85760546 2.30665851 1.85421658 1.85729861]
|
|
|
|
mean value: 1.7173393011093139
|
|
|
|
key: score_time
|
|
value: [0.01311946 0.01507545 0.01247096 0.01482487 0.01506543 0.03141546
|
|
0.01907992 0.01261067 0.01477861 0.01477838]
|
|
|
|
mean value: 0.01632192134857178
|
|
|
|
key: test_mcc
|
|
value: [0.82462113 0.81322028 0.8047619 0.95238095 0.81975606 0.95227002
|
|
0.8047619 1. 0.80817439 0.95227002]
|
|
|
|
mean value: 0.8732216655064745
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.9047619 0.90243902 0.97560976 0.90243902 0.97560976
|
|
0.90243902 1. 0.90243902 0.97560976]
|
|
|
|
mean value: 0.9346109175377468
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.89473684 0.9 0.9 0.97560976 0.88888889 0.97435897
|
|
0.9047619 1. 0.90909091 0.97674419]
|
|
|
|
mean value: 0.9324191461350013
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.94736842 0.9 0.95238095 1. 1.
|
|
0.9047619 1. 0.86956522 0.95454545]
|
|
|
|
mean value: 0.9528621950132248
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.85714286 0.9 1. 0.8 0.95
|
|
0.9047619 1. 0.95238095 1. ]
|
|
|
|
mean value: 0.9173809523809524
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9047619 0.9047619 0.90238095 0.97619048 0.9 0.975
|
|
0.90238095 1. 0.90119048 0.975 ]
|
|
|
|
mean value: 0.9341666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.80952381 0.81818182 0.81818182 0.95238095 0.8 0.95
|
|
0.82608696 1. 0.83333333 0.95454545]
|
|
|
|
mean value: 0.8762234142668925
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02465558 0.016572 0.01520538 0.02612782 0.01691365 0.02268863
|
|
0.02848172 0.02351189 0.01709127 0.02407074]
|
|
|
|
mean value: 0.02153186798095703
|
|
|
|
key: score_time
|
|
value: [0.01841354 0.01036406 0.01123571 0.00964928 0.00933528 0.00891948
|
|
0.01145577 0.00994396 0.00893211 0.01515102]
|
|
|
|
mean value: 0.011340022087097168
|
|
|
|
key: test_mcc
|
|
value: [0.8660254 0.95346259 0.90649828 1. 0.85441771 0.95227002
|
|
0.95238095 0.95238095 0.85441771 0.8547619 ]
|
|
|
|
mean value: 0.9146615512789581
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92857143 0.97619048 0.95121951 1. 0.92682927 0.97560976
|
|
0.97560976 0.97560976 0.92682927 0.92682927]
|
|
|
|
mean value: 0.9563298490127758
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.97560976 0.94736842 1. 0.92307692 0.97435897
|
|
0.97560976 0.97560976 0.93023256 0.92682927]
|
|
|
|
mean value: 0.9551772336290353
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.94736842 1.
|
|
1. 1. 0.90909091 0.95 ]
|
|
|
|
mean value: 0.9806459330143541
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.95238095 0.9 1. 0.9 0.95
|
|
0.95238095 0.95238095 0.95238095 0.9047619 ]
|
|
|
|
mean value: 0.9321428571428572
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92857143 0.97619048 0.95 1. 0.92619048 0.975
|
|
0.97619048 0.97619048 0.92619048 0.92738095]
|
|
|
|
mean value: 0.9561904761904761
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.95238095 0.9 1. 0.85714286 0.95
|
|
0.95238095 0.95238095 0.86956522 0.86363636]
|
|
|
|
mean value: 0.9154630152456239
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1296804 0.12818766 0.12894821 0.13796115 0.11931705 0.12167764
|
|
0.11896539 0.12175727 0.12500453 0.13481355]
|
|
|
|
mean value: 0.1266312837600708
|
|
|
|
key: score_time
|
|
value: [0.0192759 0.01999116 0.01901579 0.01934671 0.01817155 0.02717113
|
|
0.03437304 0.02251887 0.02317071 0.01954842]
|
|
|
|
mean value: 0.02225832939147949
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.85811633 0.8047619 0.95238095 0.81975606 1.
|
|
0.80817439 0.86333169 0.7633652 0.85441771]
|
|
|
|
mean value: 0.867776682928389
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.92857143 0.90243902 0.97560976 0.90243902 1.
|
|
0.90243902 0.92682927 0.87804878 0.92682927]
|
|
|
|
mean value: 0.9319396051103368
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.93023256 0.9 0.97560976 0.88888889 1.
|
|
0.90909091 0.92307692 0.88888889 0.93023256]
|
|
|
|
mean value: 0.9321630238419801
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.90909091 0.9 0.95238095 1. 1.
|
|
0.86956522 1. 0.83333333 0.90909091]
|
|
|
|
mean value: 0.9373461321287408
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.95238095 0.9 1. 0.8 1.
|
|
0.95238095 0.85714286 0.95238095 0.95238095]
|
|
|
|
mean value: 0.9319047619047619
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.92857143 0.90238095 0.97619048 0.9 1.
|
|
0.90119048 0.92857143 0.87619048 0.92619048]
|
|
|
|
mean value: 0.9315476190476191
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.86956522 0.81818182 0.95238095 0.8 1.
|
|
0.83333333 0.85714286 0.8 0.86956522]
|
|
|
|
mean value: 0.8752550348202522
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.77
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01090407 0.01682448 0.01031947 0.00970149 0.0096662 0.01756215
|
|
0.00981498 0.00959396 0.01117182 0.01251793]
|
|
|
|
mean value: 0.011807656288146973
|
|
|
|
key: score_time
|
|
value: [0.01086402 0.01643109 0.00869727 0.0086472 0.00876594 0.01397753
|
|
0.00873542 0.00865102 0.00945759 0.01478791]
|
|
|
|
mean value: 0.010901498794555663
|
|
|
|
key: test_mcc
|
|
value: [0.71754731 0.64597519 0.46623254 0.77831178 0.81975606 0.85441771
|
|
0.65952381 0.72229808 0.8047619 0.6133669 ]
|
|
|
|
mean value: 0.7082191294366296
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.80952381 0.73170732 0.87804878 0.90243902 0.92682927
|
|
0.82926829 0.85365854 0.90243902 0.80487805]
|
|
|
|
mean value: 0.8495934959349594
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85 0.77777778 0.7027027 0.85714286 0.88888889 0.92307692
|
|
0.82926829 0.84210526 0.9047619 0.8 ]
|
|
|
|
mean value: 0.8375724610191876
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.93333333 0.76470588 1. 1. 0.94736842
|
|
0.85 0.94117647 0.9047619 0.84210526]
|
|
|
|
mean value: 0.9078188117352204
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.66666667 0.65 0.75 0.8 0.9
|
|
0.80952381 0.76190476 0.9047619 0.76190476]
|
|
|
|
mean value: 0.7814285714285715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.85714286 0.80952381 0.7297619 0.875 0.9 0.92619048
|
|
0.8297619 0.85595238 0.90238095 0.80595238]
|
|
|
|
mean value: 0.8491666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.73913043 0.63636364 0.54166667 0.75 0.8 0.85714286
|
|
0.70833333 0.72727273 0.82608696 0.66666667]
|
|
|
|
mean value: 0.7252663278750235
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.71200705 1.73543882 1.69342971 1.76952863 1.73066044 1.48200154
|
|
1.47888637 1.46924496 1.47123814 1.48011613]
|
|
|
|
mean value: 1.6022551774978637
|
|
|
|
key: score_time
|
|
value: [0.11766553 0.1164763 0.10360074 0.10462189 0.09121513 0.09602308
|
|
0.14463115 0.09650397 0.09187126 0.09328437]
|
|
|
|
mean value: 0.10558934211730957
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 1. 1. 0.95238095 1. 1.
|
|
0.85441771 1. 0.86240942 0.95227002]
|
|
|
|
mean value: 0.9574940680202537
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 1. 1. 0.97560976 1. 1.
|
|
0.92682927 1. 0.92682927 0.97560976]
|
|
|
|
mean value: 0.9781068524970964
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 1. 1. 0.97560976 1. 1.
|
|
0.93023256 1. 0.93333333 0.97674419]
|
|
|
|
mean value: 0.9791529589714502
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.95238095 1. 1.
|
|
0.90909091 1. 0.875 0.95454545]
|
|
|
|
mean value: 0.9691017316017316
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 1. 1. 1. 1.
|
|
0.95238095 1. 1. 1. ]
|
|
|
|
mean value: 0.9904761904761905
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 1. 1. 0.97619048 1. 1.
|
|
0.92619048 1. 0.925 0.975 ]
|
|
|
|
mean value: 0.9778571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 1. 1. 0.95238095 1. 1.
|
|
0.86956522 1. 0.875 0.95454545]
|
|
|
|
mean value: 0.9603872576698663
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.88163686 0.94899797 0.92457771 0.92120004 0.88247681 1.04019213
|
|
0.91119266 0.97453594 0.94692683 0.96164441]
|
|
|
|
mean value: 0.9393381357192994
|
|
|
|
key: score_time
|
|
value: [0.24661112 0.24717331 0.24636698 0.15798783 0.18983316 0.25734568
|
|
0.26642156 0.22456074 0.2825954 0.12304974]
|
|
|
|
mean value: 0.22419455051422119
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.95346259 0.90238095 0.95238095 1. 1.
|
|
0.85441771 0.90649828 0.90649828 0.90649828]
|
|
|
|
mean value: 0.9335599629113059
|
|
|
|
key: train_mcc
|
|
value: [0.97310093 0.97332853 0.96261094 0.96787795 0.98395537 0.97339739
|
|
0.97866529 0.9734012 0.98395676 0.97317407]
|
|
|
|
mean value: 0.9743468410052059
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.97619048 0.95121951 0.97560976 1. 1.
|
|
0.92682927 0.95121951 0.95121951 0.95121951]
|
|
|
|
mean value: 0.9659698025551684
|
|
|
|
key: train_accuracy
|
|
value: [0.98648649 0.98648649 0.98113208 0.98382749 0.99191375 0.98652291
|
|
0.98921833 0.98652291 0.99191375 0.98652291]
|
|
|
|
mean value: 0.9870547096962191
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.97560976 0.95 0.97560976 1. 1.
|
|
0.93023256 0.95454545 0.95454545 0.95454545]
|
|
|
|
mean value: 0.9670698190068581
|
|
|
|
key: train_fscore
|
|
value: [0.98659517 0.98666667 0.98143236 0.98404255 0.992 0.9867374
|
|
0.98930481 0.98666667 0.9919571 0.98659517]
|
|
|
|
mean value: 0.9871997913715367
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.95 0.95238095 1. 1.
|
|
0.90909091 0.91304348 0.91304348 0.91304348]
|
|
|
|
mean value: 0.955060229625447
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.9787234 0.97368421 0.96858639 0.97368421 0.98412698 0.97382199
|
|
0.97883598 0.97368421 0.98404255 0.9787234 ]
|
|
|
|
mean value: 0.9767913333207389
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.95238095 0.95 1. 1. 1.
|
|
0.95238095 1. 1. 1. ]
|
|
|
|
mean value: 0.9807142857142856
|
|
|
|
key: train_recall
|
|
value: [0.99459459 1. 0.99462366 0.99462366 1. 1.
|
|
1. 1. 1. 0.99459459]
|
|
|
|
mean value: 0.9978436501017146
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.97619048 0.95119048 0.97619048 1. 1.
|
|
0.92619048 0.95 0.95 0.95 ]
|
|
|
|
mean value: 0.9655952380952381
|
|
|
|
key: train_roc_auc
|
|
value: [0.98648649 0.98648649 0.98109561 0.98379831 0.99189189 0.98648649
|
|
0.98924731 0.98655914 0.99193548 0.98654461]
|
|
|
|
mean value: 0.9870531822144726
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.95238095 0.9047619 0.95238095 1. 1.
|
|
0.86956522 0.91304348 0.91304348 0.91304348]
|
|
|
|
mean value: 0.9370600414078675
|
|
|
|
key: train_jcc
|
|
value: [0.97354497 0.97368421 0.96354167 0.96858639 0.98412698 0.97382199
|
|
0.97883598 0.97368421 0.98404255 0.97354497]
|
|
|
|
mean value: 0.9747413927927049
|
|
|
|
MCC on Blind test: 0.9
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02322245 0.01115775 0.01000214 0.0100019 0.00992441 0.0100491
|
|
0.00989175 0.00991106 0.01019478 0.01086497]
|
|
|
|
mean value: 0.011522030830383301
|
|
|
|
key: score_time
|
|
value: [0.01117277 0.00906587 0.00894356 0.00887871 0.00882888 0.00881004
|
|
0.00894785 0.00873399 0.00964856 0.00930524]
|
|
|
|
mean value: 0.009233546257019044
|
|
|
|
key: test_mcc
|
|
value: [0.57207755 0.52620136 0.36718832 0.72229808 0.7098505 0.6806903
|
|
0.65871309 0.41428571 0.51320273 0.51190476]
|
|
|
|
mean value: 0.5676412422906144
|
|
|
|
key: train_mcc
|
|
value: [0.63807092 0.62969126 0.6558879 0.6516517 0.68216317 0.64716482
|
|
0.68245673 0.68195292 0.68220933 0.66900863]
|
|
|
|
mean value: 0.662025738279246
|
|
|
|
key: test_accuracy
|
|
value: [0.78571429 0.76190476 0.68292683 0.85365854 0.85365854 0.82926829
|
|
0.82926829 0.70731707 0.75609756 0.75609756]
|
|
|
|
mean value: 0.7815911730545877
|
|
|
|
key: train_accuracy
|
|
value: [0.81891892 0.81351351 0.82749326 0.82479784 0.84097035 0.82210243
|
|
0.84097035 0.84097035 0.84097035 0.8328841 ]
|
|
|
|
mean value: 0.8303591462082028
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.77272727 0.64864865 0.86363636 0.84210526 0.84444444
|
|
0.8372093 0.71428571 0.77272727 0.76190476]
|
|
|
|
mean value: 0.7848386718276559
|
|
|
|
key: train_fscore
|
|
value: [0.82133333 0.82170543 0.83246073 0.83204134 0.84350133 0.83076923
|
|
0.84350133 0.84097035 0.84266667 0.84020619]
|
|
|
|
mean value: 0.8349155922270581
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.73913043 0.70588235 0.79166667 0.88888889 0.76
|
|
0.81818182 0.71428571 0.73913043 0.76190476]
|
|
|
|
mean value: 0.7691798345161517
|
|
|
|
key: train_precision
|
|
value: [0.81052632 0.78712871 0.81122449 0.80099502 0.83246073 0.79411765
|
|
0.828125 0.83870968 0.83157895 0.80295567]
|
|
|
|
mean value: 0.8137822213187824
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.80952381 0.6 0.95 0.8 0.95
|
|
0.85714286 0.71428571 0.80952381 0.76190476]
|
|
|
|
mean value: 0.8061904761904761
|
|
|
|
key: train_recall
|
|
value: [0.83243243 0.85945946 0.85483871 0.8655914 0.85483871 0.87096774
|
|
0.85945946 0.84324324 0.85405405 0.88108108]
|
|
|
|
mean value: 0.8575966288869514
|
|
|
|
key: test_roc_auc
|
|
value: [0.78571429 0.76190476 0.68095238 0.85595238 0.85238095 0.83214286
|
|
0.82857143 0.70714286 0.7547619 0.75595238]
|
|
|
|
mean value: 0.781547619047619
|
|
|
|
key: train_roc_auc
|
|
value: [0.81891892 0.81351351 0.82741935 0.82468759 0.84093287 0.82197036
|
|
0.84102005 0.84097646 0.84100552 0.83301366]
|
|
|
|
mean value: 0.8303458297006684
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.62962963 0.48 0.76 0.72727273 0.73076923
|
|
0.72 0.55555556 0.62962963 0.61538462]
|
|
|
|
mean value: 0.6502087542087542
|
|
|
|
key: train_jcc
|
|
value: [0.69683258 0.69736842 0.71300448 0.71238938 0.7293578 0.71052632
|
|
0.7293578 0.7255814 0.7281106 0.72444444]
|
|
|
|
mean value: 0.716697321606543
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.25485444 0.65282845 0.07063746 0.06406283 0.07283258 0.06812763
|
|
0.06811261 0.07140255 0.07633281 0.44679737]
|
|
|
|
mean value: 0.18459887504577638
|
|
|
|
key: score_time
|
|
value: [0.01287341 0.01114225 0.01090765 0.01067328 0.01123691 0.01061821
|
|
0.01061773 0.01126909 0.01094055 0.01281428]
|
|
|
|
mean value: 0.011309337615966798
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.95346259 1. 1. 0.85441771 1.
|
|
0.95238095 1. 0.90649828 0.95227002]
|
|
|
|
mean value: 0.9572492133248716
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.97619048 1. 1. 0.92682927 1.
|
|
0.97560976 1. 0.95121951 0.97560976]
|
|
|
|
mean value: 0.9781649245063879
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.97560976 1. 1. 0.92307692 1.
|
|
0.97560976 1. 0.95454545 0.97674419]
|
|
|
|
mean value: 0.9781195831961572
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.94736842 1.
|
|
1. 1. 0.91304348 0.95454545]
|
|
|
|
mean value: 0.9814957353858955
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.95238095 1. 1. 0.9 1.
|
|
0.95238095 1. 1. 1. ]
|
|
|
|
mean value: 0.9757142857142858
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.97619048 1. 1. 0.92619048 1.
|
|
0.97619048 1. 0.95 0.975 ]
|
|
|
|
mean value: 0.9779761904761904
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.95238095 1. 1. 0.85714286 1.
|
|
0.95238095 1. 0.91304348 0.95454545]
|
|
|
|
mean value: 0.9581874647092038
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.88
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04435015 0.07989097 0.07348156 0.06565762 0.07407832 0.03800893
|
|
0.08862805 0.09641933 0.09125471 0.06087875]
|
|
|
|
mean value: 0.07126483917236329
|
|
|
|
key: score_time
|
|
value: [0.02085805 0.02275562 0.02071571 0.02121043 0.01224446 0.01214051
|
|
0.02340412 0.02287984 0.02411938 0.0126636 ]
|
|
|
|
mean value: 0.01929917335510254
|
|
|
|
key: test_mcc
|
|
value: [0.9047619 0.81322028 0.95238095 0.7633652 0.95227002 0.95227002
|
|
0.86333169 0.8547619 0.65952381 0.95227002]
|
|
|
|
mean value: 0.8668155793025234
|
|
|
|
key: train_mcc
|
|
value: [0.98379816 0.98918919 0.98921825 0.98921825 0.98921825 0.98921825
|
|
0.99462366 0.98384191 0.9946235 0.9946235 ]
|
|
|
|
mean value: 0.9897572910128184
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.9047619 0.97560976 0.87804878 0.97560976 0.97560976
|
|
0.92682927 0.92682927 0.82926829 0.97560976]
|
|
|
|
mean value: 0.9320557491289199
|
|
|
|
key: train_accuracy
|
|
value: [0.99189189 0.99459459 0.99460916 0.99460916 0.99460916 0.99460916
|
|
0.99730458 0.99191375 0.99730458 0.99730458]
|
|
|
|
mean value: 0.9948750637429883
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.9 0.97560976 0.86486486 0.97435897 0.97435897
|
|
0.92307692 0.92682927 0.82926829 0.97674419]
|
|
|
|
mean value: 0.9297492192160371
|
|
|
|
key: train_fscore
|
|
value: [0.99186992 0.99459459 0.99462366 0.99462366 0.99462366 0.99462366
|
|
0.99730458 0.99191375 0.99728997 0.99728997]
|
|
|
|
mean value: 0.9948757411590124
|
|
|
|
key: test_precision
|
|
value: [0.95238095 0.94736842 0.95238095 0.94117647 1. 1.
|
|
1. 0.95 0.85 0.95454545]
|
|
|
|
mean value: 0.9547852250948226
|
|
|
|
key: train_precision
|
|
value: [0.99456522 0.99459459 0.99462366 0.99462366 0.99462366 0.99462366
|
|
0.99462366 0.98924731 1. 1. ]
|
|
|
|
mean value: 0.9951525403383749
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.85714286 1. 0.8 0.95 0.95
|
|
0.85714286 0.9047619 0.80952381 1. ]
|
|
|
|
mean value: 0.9080952380952381
|
|
|
|
key: train_recall
|
|
value: [0.98918919 0.99459459 0.99462366 0.99462366 0.99462366 0.99462366
|
|
1. 0.99459459 0.99459459 0.99459459]
|
|
|
|
mean value: 0.9946062191223481
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.9047619 0.97619048 0.87619048 0.975 0.975
|
|
0.92857143 0.92738095 0.8297619 0.975 ]
|
|
|
|
mean value: 0.9320238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [0.99189189 0.99459459 0.99460913 0.99460913 0.99460913 0.99460913
|
|
0.99731183 0.99192095 0.9972973 0.9972973 ]
|
|
|
|
mean value: 0.9948750363266493
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.81818182 0.95238095 0.76190476 0.95 0.95
|
|
0.85714286 0.86363636 0.70833333 0.95454545]
|
|
|
|
mean value: 0.872521645021645
|
|
|
|
key: train_jcc
|
|
value: [0.98387097 0.98924731 0.98930481 0.98930481 0.98930481 0.98930481
|
|
0.99462366 0.98395722 0.99459459 0.99459459]
|
|
|
|
mean value: 0.9898107595261295
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01397157 0.01064801 0.01116705 0.00983787 0.01101112 0.01056051
|
|
0.01734877 0.01102638 0.01001835 0.01107717]
|
|
|
|
mean value: 0.011666679382324218
|
|
|
|
key: score_time
|
|
value: [0.01169801 0.0106709 0.01013255 0.00997734 0.00976014 0.00959802
|
|
0.01371956 0.00910187 0.00948548 0.00973678]
|
|
|
|
mean value: 0.010388064384460449
|
|
|
|
key: test_mcc
|
|
value: [0.61904762 0.53357838 0.56086079 0.58066054 0.65871309 0.51551459
|
|
0.46428571 0.7197263 0.65871309 0.51966679]
|
|
|
|
mean value: 0.5830766911453171
|
|
|
|
key: train_mcc
|
|
value: [0.62458505 0.63192977 0.66817939 0.6027138 0.62040699 0.62388021
|
|
0.58080121 0.62919597 0.66395875 0.63891466]
|
|
|
|
mean value: 0.6284565795023681
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.76190476 0.7804878 0.7804878 0.82926829 0.75609756
|
|
0.73170732 0.85365854 0.82926829 0.75609756]
|
|
|
|
mean value: 0.7888501742160279
|
|
|
|
key: train_accuracy
|
|
value: [0.81081081 0.81351351 0.8328841 0.80053908 0.80862534 0.81132075
|
|
0.78975741 0.81401617 0.83018868 0.81671159]
|
|
|
|
mean value: 0.8128367451008961
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.7826087 0.76923077 0.8 0.82051282 0.76190476
|
|
0.73170732 0.86956522 0.8372093 0.7826087 ]
|
|
|
|
mean value: 0.7964871389266566
|
|
|
|
key: train_fscore
|
|
value: [0.81958763 0.82442748 0.84020619 0.80829016 0.81841432 0.81770833
|
|
0.79581152 0.81889764 0.83804627 0.82741117]
|
|
|
|
mean value: 0.8208800702499555
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.72 0.78947368 0.72 0.84210526 0.72727273
|
|
0.75 0.8 0.81818182 0.72 ]
|
|
|
|
mean value: 0.7696557302346776
|
|
|
|
key: train_precision
|
|
value: [0.78325123 0.77884615 0.80693069 0.78 0.7804878 0.79292929
|
|
0.7715736 0.79591837 0.79901961 0.77990431]
|
|
|
|
mean value: 0.7868861061720982
|
|
|
|
key: test_recall
|
|
value: [0.80952381 0.85714286 0.75 0.9 0.8 0.8
|
|
0.71428571 0.95238095 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8297619047619047
|
|
|
|
key: train_recall
|
|
value: [0.85945946 0.87567568 0.87634409 0.83870968 0.86021505 0.84408602
|
|
0.82162162 0.84324324 0.88108108 0.88108108]
|
|
|
|
mean value: 0.858151700087184
|
|
|
|
key: test_roc_auc
|
|
value: [0.80952381 0.76190476 0.7797619 0.78333333 0.82857143 0.75714286
|
|
0.73214286 0.85119048 0.82857143 0.75357143]
|
|
|
|
mean value: 0.7885714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.81081081 0.81351351 0.83276664 0.80043592 0.80848591 0.8112322
|
|
0.78984307 0.81409474 0.83032549 0.81688463]
|
|
|
|
mean value: 0.812839290903807
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.64285714 0.625 0.66666667 0.69565217 0.61538462
|
|
0.57692308 0.76923077 0.72 0.64285714]
|
|
|
|
mean value: 0.6634571587832457
|
|
|
|
key: train_jcc
|
|
value: [0.69432314 0.7012987 0.72444444 0.67826087 0.69264069 0.69162996
|
|
0.66086957 0.69333333 0.72123894 0.70562771]
|
|
|
|
mean value: 0.6963667350232523
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02573824 0.02512217 0.02389956 0.01943803 0.01827455 0.02354956
|
|
0.02093744 0.02193046 0.02814054 0.01929259]
|
|
|
|
mean value: 0.022632312774658204
|
|
|
|
key: score_time
|
|
value: [0.01042128 0.01171613 0.01189804 0.01167703 0.01169205 0.01298094
|
|
0.0117383 0.01208544 0.01217604 0.01196671]
|
|
|
|
mean value: 0.011835193634033203
|
|
|
|
key: test_mcc
|
|
value: [0.90889326 0.8660254 0.85441771 0.90692382 0.81975606 1.
|
|
0.90238095 0.95238095 0.80817439 0.77831178]
|
|
|
|
mean value: 0.8797264335200057
|
|
|
|
key: train_mcc
|
|
value: [0.97860715 0.98391316 0.9946235 0.93726212 0.94697838 0.97866283
|
|
0.93728335 0.98395537 1. 0.87323811]
|
|
|
|
mean value: 0.9614523980709841
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.92857143 0.92682927 0.95121951 0.90243902 1.
|
|
0.95121951 0.97560976 0.90243902 0.87804878]
|
|
|
|
mean value: 0.9368757259001161
|
|
|
|
key: train_accuracy
|
|
value: [0.98918919 0.99189189 0.99730458 0.96765499 0.97304582 0.98921833
|
|
0.96765499 0.99191375 1. 0.93261456]
|
|
|
|
mean value: 0.9800488089167334
|
|
|
|
key: test_fscore
|
|
value: [0.95 0.92307692 0.92307692 0.95238095 0.88888889 1.
|
|
0.95238095 0.97560976 0.90909091 0.89361702]
|
|
|
|
mean value: 0.9368122326269706
|
|
|
|
key: train_fscore
|
|
value: [0.98907104 0.99182561 0.99731903 0.96875 0.97252747 0.9893617
|
|
0.96858639 0.99182561 1. 0.93670886]
|
|
|
|
mean value: 0.9805975722111132
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.94736842 0.90909091 1. 1.
|
|
0.95238095 1. 0.86956522 0.80769231]
|
|
|
|
mean value: 0.9486097807608105
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.99465241 0.93939394 0.99438202 0.97894737
|
|
0.93908629 1. 1. 0.88095238]
|
|
|
|
mean value: 0.9727414412072639
|
|
|
|
key: test_recall
|
|
value: [0.9047619 0.85714286 0.9 1. 0.8 1.
|
|
0.95238095 0.95238095 0.95238095 1. ]
|
|
|
|
mean value: 0.9319047619047619
|
|
|
|
key: train_recall
|
|
value: [0.97837838 0.98378378 1. 1. 0.9516129 1.
|
|
1. 0.98378378 1. 1. ]
|
|
|
|
mean value: 0.9897558849171753
|
|
|
|
key: test_roc_auc
|
|
value: [0.95238095 0.92857143 0.92619048 0.95238095 0.9 1.
|
|
0.95119048 0.97619048 0.90119048 0.875 ]
|
|
|
|
mean value: 0.9363095238095238
|
|
|
|
key: train_roc_auc
|
|
value: [0.98918919 0.99189189 0.9972973 0.96756757 0.97310375 0.98918919
|
|
0.96774194 0.99189189 1. 0.9327957 ]
|
|
|
|
mean value: 0.980066841034583
|
|
|
|
key: test_jcc
|
|
value: [0.9047619 0.85714286 0.85714286 0.90909091 0.8 1.
|
|
0.90909091 0.95238095 0.83333333 0.80769231]
|
|
|
|
mean value: 0.8830636030636031
|
|
|
|
key: train_jcc
|
|
value: [0.97837838 0.98378378 0.99465241 0.93939394 0.94652406 0.97894737
|
|
0.93908629 0.98378378 1. 0.88095238]
|
|
|
|
mean value: 0.9625502399717798
|
|
|
|
MCC on Blind test: 0.8
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01839256 0.03484535 0.01788545 0.01615334 0.01706839 0.01675463
|
|
0.02165723 0.01748395 0.01716471 0.01620626]
|
|
|
|
mean value: 0.019361186027526855
|
|
|
|
key: score_time
|
|
value: [0.01203561 0.01304317 0.0117414 0.01175261 0.01182771 0.01171184
|
|
0.01184654 0.01171041 0.01172614 0.01175642]
|
|
|
|
mean value: 0.011915183067321778
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.70710678 0.67700771 0.90692382 0.85441771 0.74124932
|
|
0.59335232 0.73786479 0.62048368 0.8213423 ]
|
|
|
|
mean value: 0.7613211022538029
|
|
|
|
key: train_mcc
|
|
value: [0.96807684 0.92195445 0.93618785 0.94103803 0.95709803 0.71475641
|
|
0.72814281 0.8780389 0.62242988 0.74956267]
|
|
|
|
mean value: 0.8417285864866383
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.83333333 0.82926829 0.95121951 0.92682927 0.85365854
|
|
0.7804878 0.85365854 0.7804878 0.90243902]
|
|
|
|
mean value: 0.8687572590011614
|
|
|
|
key: train_accuracy
|
|
value: [0.98378378 0.95945946 0.96765499 0.9703504 0.97843666 0.83827493
|
|
0.84636119 0.93530997 0.77897574 0.85983827]
|
|
|
|
mean value: 0.9118445399577475
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.8 0.8 0.95238095 0.92307692 0.86956522
|
|
0.81632653 0.875 0.82352941 0.89473684]
|
|
|
|
mean value: 0.8730225633428955
|
|
|
|
key: train_fscore
|
|
value: [0.98404255 0.95774648 0.96703297 0.97082228 0.97826087 0.86111111
|
|
0.86651054 0.93908629 0.81858407 0.83647799]
|
|
|
|
mean value: 0.9179675152216906
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.93333333 0.90909091 0.94736842 0.76923077
|
|
0.71428571 0.77777778 0.7 1. ]
|
|
|
|
mean value: 0.8751086924771135
|
|
|
|
key: train_precision
|
|
value: [0.96858639 1. 0.98876404 0.95811518 0.98901099 0.75609756
|
|
0.76446281 0.88516746 0.6928839 1. ]
|
|
|
|
mean value: 0.9003088334774321
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.66666667 0.7 1. 0.9 1.
|
|
0.95238095 1. 1. 0.80952381]
|
|
|
|
mean value: 0.8980952380952381
|
|
|
|
key: train_recall
|
|
value: [1. 0.91891892 0.94623656 0.98387097 0.96774194 1.
|
|
1. 1. 1. 0.71891892]
|
|
|
|
mean value: 0.9535687300203429
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.83333333 0.82619048 0.95238095 0.92619048 0.85714286
|
|
0.77619048 0.85 0.775 0.9047619 ]
|
|
|
|
mean value: 0.8677380952380952
|
|
|
|
key: train_roc_auc
|
|
value: [0.98378378 0.95945946 0.96771287 0.97031386 0.97846556 0.83783784
|
|
0.84677419 0.93548387 0.77956989 0.85945946]
|
|
|
|
mean value: 0.9118860796280152
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.66666667 0.66666667 0.90909091 0.85714286 0.76923077
|
|
0.68965517 0.77777778 0.7 0.80952381]
|
|
|
|
mean value: 0.7798135580894201
|
|
|
|
key: train_jcc
|
|
value: [0.96858639 0.91891892 0.93617021 0.94329897 0.95744681 0.75609756
|
|
0.76446281 0.88516746 0.6928839 0.71891892]
|
|
|
|
mean value: 0.8541951945760038
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.18290973 0.17475319 0.17721367 0.17609525 0.176162 0.1720438
|
|
0.18470693 0.16746712 0.15189815 0.15740561]
|
|
|
|
mean value: 0.17206554412841796
|
|
|
|
key: score_time
|
|
value: [0.01708293 0.016047 0.01525402 0.01880813 0.01527667 0.01667857
|
|
0.02105594 0.01652122 0.01655769 0.01626611]
|
|
|
|
mean value: 0.016954827308654784
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 1. 1. 1. 0.95238095 0.95227002
|
|
0.95238095 0.95238095 0.90649828 0.95227002]
|
|
|
|
mean value: 0.9621643756089681
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 1. 1. 1. 0.97560976 0.97560976
|
|
0.97560976 0.97560976 0.95121951 0.97560976]
|
|
|
|
mean value: 0.9805458768873403
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 1. 1. 1. 0.97560976 0.97435897
|
|
0.97560976 0.97560976 0.95454545 0.97674419]
|
|
|
|
mean value: 0.9808087639341184
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.95238095 1.
|
|
1. 1. 0.91304348 0.95454545]
|
|
|
|
mean value: 0.9819969885187276
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 1. 1. 1. 0.95
|
|
0.95238095 0.95238095 1. 1. ]
|
|
|
|
mean value: 0.9807142857142856
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 1. 1. 1. 0.97619048 0.975
|
|
0.97619048 0.97619048 0.95 0.975 ]
|
|
|
|
mean value: 0.9804761904761905
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 1. 1. 1. 0.95238095 0.95
|
|
0.95238095 0.95238095 0.91304348 0.95454545]
|
|
|
|
mean value: 0.9627112742330133
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.9
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06012321 0.05812716 0.06727004 0.06751275 0.05394554 0.05166769
|
|
0.07125807 0.0593648 0.05061197 0.06649494]
|
|
|
|
mean value: 0.060637617111206056
|
|
|
|
key: score_time
|
|
value: [0.03201938 0.02349854 0.02908587 0.03036594 0.02365637 0.02560067
|
|
0.024652 0.02214336 0.02164698 0.02623129]
|
|
|
|
mean value: 0.025890040397644042
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 1. 0.95238095 1. 0.90238095 0.95227002
|
|
0.95238095 0.95238095 0.90649828 0.80907152]
|
|
|
|
mean value: 0.9380826209946751
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.98384144 0.99462366 0.99462366 1.
|
|
0.99462366 1. 0.9946235 0.99462366]
|
|
|
|
mean value: 0.9956959560990601
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 1. 0.97560976 1. 0.95121951 0.97560976
|
|
0.97560976 0.97560976 0.95121951 0.90243902]
|
|
|
|
mean value: 0.9683507549361208
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.99191375 0.99730458 0.99730458 1.
|
|
0.99730458 1. 0.99730458 0.99730458]
|
|
|
|
mean value: 0.997843665768194
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 1. 0.97560976 1. 0.95 0.97435897
|
|
0.97560976 0.97560976 0.95454545 0.9 ]
|
|
|
|
mean value: 0.9681343453294673
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.9919571 0.99730458 0.99730458 1.
|
|
0.99730458 1. 0.99728997 0.99730458]
|
|
|
|
mean value: 0.997846540629834
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.95238095 1. 0.95 1.
|
|
1. 1. 0.91304348 0.94736842]
|
|
|
|
mean value: 0.9762792851694453
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.98930481 1. 1. 1.
|
|
0.99462366 1. 1. 0.99462366]
|
|
|
|
mean value: 0.9978552124662181
|
|
|
|
key: test_recall
|
|
value: [0.95238095 1. 1. 1. 0.95 0.95
|
|
0.95238095 0.95238095 1. 0.85714286]
|
|
|
|
mean value: 0.9614285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.99462366 0.99462366 0.99462366 1.
|
|
1. 1. 0.99459459 1. ]
|
|
|
|
mean value: 0.997846556233653
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 1. 0.97619048 1. 0.95119048 0.975
|
|
0.97619048 0.97619048 0.95 0.90357143]
|
|
|
|
mean value: 0.968452380952381
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.99190642 0.99731183 0.99731183 1.
|
|
0.99731183 1. 0.9972973 0.99731183]
|
|
|
|
mean value: 0.9978451031676838
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 1. 0.95238095 1. 0.9047619 0.95
|
|
0.95238095 0.95238095 0.91304348 0.81818182]
|
|
|
|
mean value: 0.9395511010728401
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.98404255 0.99462366 0.99462366 1.
|
|
0.99462366 1. 0.99459459 0.99462366]
|
|
|
|
mean value: 0.9957131771441998
|
|
|
|
MCC on Blind test: 0.87
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09891081 0.11998367 0.09824228 0.12802958 0.08986497 0.14036012
|
|
0.14788032 0.11883092 0.14361811 0.19772005]
|
|
|
|
mean value: 0.12834408283233642
|
|
|
|
key: score_time
|
|
value: [0.0226264 0.02250314 0.01391673 0.02268267 0.01394129 0.02342319
|
|
0.02288437 0.02269959 0.02311373 0.02368975]
|
|
|
|
mean value: 0.021148085594177246
|
|
|
|
key: test_mcc
|
|
value: [0.68640647 0.71754731 0.57570364 0.90238095 0.7197263 0.75714286
|
|
0.65952381 0.86333169 0.6133669 0.76500781]
|
|
|
|
mean value: 0.7260137745399882
|
|
|
|
key: train_mcc
|
|
value: [0.97310093 0.97843556 0.978494 0.98927606 0.978494 0.96765475
|
|
0.97849275 0.97849275 0.97317174 0.97305937]
|
|
|
|
mean value: 0.9768671913757482
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.85714286 0.7804878 0.95121951 0.85365854 0.87804878
|
|
0.82926829 0.92682927 0.80487805 0.87804878]
|
|
|
|
mean value: 0.8592915214866435
|
|
|
|
key: train_accuracy
|
|
value: [0.98648649 0.98918919 0.98921833 0.99460916 0.98921833 0.98382749
|
|
0.98921833 0.98921833 0.98652291 0.98652291]
|
|
|
|
mean value: 0.9884031470823924
|
|
|
|
key: test_fscore
|
|
value: [0.81081081 0.85 0.74285714 0.95 0.83333333 0.87804878
|
|
0.82926829 0.92307692 0.8 0.87179487]
|
|
|
|
mean value: 0.8489190155043814
|
|
|
|
key: train_fscore
|
|
value: [0.98637602 0.98913043 0.98918919 0.99459459 0.98918919 0.98387097
|
|
0.98913043 0.98913043 0.98637602 0.98644986]
|
|
|
|
mean value: 0.988343715315811
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.89473684 0.86666667 0.95 0.9375 0.85714286
|
|
0.85 1. 0.84210526 0.94444444]
|
|
|
|
mean value: 0.9080096073517125
|
|
|
|
key: train_precision
|
|
value: [0.99450549 0.99453552 0.99456522 1. 0.99456522 0.98387097
|
|
0.99453552 0.99453552 0.99450549 0.98913043]
|
|
|
|
mean value: 0.9934749383695191
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.80952381 0.65 0.95 0.75 0.9
|
|
0.80952381 0.85714286 0.76190476 0.80952381]
|
|
|
|
mean value: 0.8011904761904762
|
|
|
|
key: train_recall
|
|
value: [0.97837838 0.98378378 0.98387097 0.98924731 0.98387097 0.98387097
|
|
0.98378378 0.98378378 0.97837838 0.98378378]
|
|
|
|
mean value: 0.9832752106945656
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.85714286 0.77738095 0.95119048 0.85119048 0.87857143
|
|
0.8297619 0.92857143 0.80595238 0.8797619 ]
|
|
|
|
mean value: 0.8592857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.98648649 0.98918919 0.98923278 0.99462366 0.98923278 0.98382738
|
|
0.98920372 0.98920372 0.98650102 0.98651555]
|
|
|
|
mean value: 0.9884016274338856
|
|
|
|
key: test_jcc
|
|
value: [0.68181818 0.73913043 0.59090909 0.9047619 0.71428571 0.7826087
|
|
0.70833333 0.85714286 0.66666667 0.77272727]
|
|
|
|
mean value: 0.7418384152079804
|
|
|
|
key: train_jcc
|
|
value: [0.97311828 0.97849462 0.97860963 0.98924731 0.97860963 0.96825397
|
|
0.97849462 0.97849462 0.97311828 0.97326203]
|
|
|
|
mean value: 0.9769702993611912
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.60291195 0.56733131 0.5522635 0.54364038 0.55846596 0.54851103
|
|
0.51982498 0.54667163 0.54160666 0.54516959]
|
|
|
|
mean value: 0.5526396989822387
|
|
|
|
key: score_time
|
|
value: [0.00946116 0.00969267 0.00912714 0.00913095 0.00936699 0.00920463
|
|
0.00918961 0.0092113 0.00943303 0.00921965]
|
|
|
|
mean value: 0.009303712844848632
|
|
|
|
key: test_mcc
|
|
value: [1. 0.95346259 1. 1. 1. 1.
|
|
0.95238095 1. 0.90649828 0.95227002]
|
|
|
|
mean value: 0.9764611836143962
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97619048 1. 1. 1. 1.
|
|
0.97560976 1. 0.95121951 0.97560976]
|
|
|
|
mean value: 0.987862950058072
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97560976 1. 1. 1. 1.
|
|
0.97560976 1. 0.95454545 0.97674419]
|
|
|
|
mean value: 0.9882509152787088
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.91304348 0.95454545]
|
|
|
|
mean value: 0.9867588932806324
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.95238095 1. 1. 1. 1.
|
|
0.95238095 1. 1. 1. ]
|
|
|
|
mean value: 0.9904761904761905
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97619048 1. 1. 1. 1.
|
|
0.97619048 1. 0.95 0.975 ]
|
|
|
|
mean value: 0.9877380952380952
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95238095 1. 1. 1. 1.
|
|
0.95238095 1. 0.91304348 0.95454545]
|
|
|
|
mean value: 0.9772350837568229
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.86
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03106356 0.02730608 0.02864671 0.02820945 0.02958632 0.06434035
|
|
0.02798939 0.09193873 0.05388188 0.03148031]
|
|
|
|
mean value: 0.0414442777633667
|
|
|
|
key: score_time
|
|
value: [0.01246595 0.01286077 0.01365685 0.01574326 0.0151403 0.01277018
|
|
0.0197835 0.01280165 0.01409435 0.01528525]
|
|
|
|
mean value: 0.014460206031799316
|
|
|
|
key: test_mcc
|
|
value: [0.67357531 0.72760688 0.74124932 0.86333169 0.78072006 0.70714286
|
|
0.61969655 0.65871309 0.66432098 0.7098505 ]
|
|
|
|
mean value: 0.7146207238265427
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.85714286 0.85365854 0.92682927 0.87804878 0.85365854
|
|
0.80487805 0.82926829 0.82926829 0.85365854]
|
|
|
|
mean value: 0.8519744483159117
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84444444 0.86956522 0.86956522 0.93023256 0.88888889 0.85
|
|
0.82608696 0.8372093 0.84444444 0.86363636]
|
|
|
|
mean value: 0.8624073393183606
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.79166667 0.8 0.76923077 0.86956522 0.8 0.85
|
|
0.76 0.81818182 0.79166667 0.82608696]
|
|
|
|
mean value: 0.8076398094658964
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9047619 0.95238095 1. 1. 1. 0.85
|
|
0.9047619 0.85714286 0.9047619 0.9047619 ]
|
|
|
|
mean value: 0.9278571428571428
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.85714286 0.85714286 0.92857143 0.88095238 0.85357143
|
|
0.80238095 0.82857143 0.82738095 0.85238095]
|
|
|
|
mean value: 0.8521428571428571
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.73076923 0.76923077 0.76923077 0.86956522 0.8 0.73913043
|
|
0.7037037 0.72 0.73076923 0.76 ]
|
|
|
|
mean value: 0.7592399355877617
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02879977 0.03616357 0.03607559 0.03617406 0.03616214 0.03645635
|
|
0.04655337 0.04668188 0.03914762 0.03796554]
|
|
|
|
mean value: 0.038017988204956055
|
|
|
|
key: score_time
|
|
value: [0.01821136 0.02122855 0.02009559 0.02300406 0.02157331 0.02307343
|
|
0.0226903 0.0217855 0.02382231 0.02646971]
|
|
|
|
mean value: 0.02219541072845459
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.85811633 0.71121921 0.90649828 0.95227002 1.
|
|
0.90692382 0.86240942 0.7633652 0.86240942]
|
|
|
|
mean value: 0.8776674275412332
|
|
|
|
key: train_mcc
|
|
value: [0.95698047 0.97837838 0.96787795 0.97317174 0.96771006 0.96261094
|
|
0.98384191 0.96261632 0.97317407 0.96788166]
|
|
|
|
mean value: 0.9694243488227355
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.92857143 0.85365854 0.95121951 0.97560976 1.
|
|
0.95121951 0.92682927 0.87804878 0.92682927]
|
|
|
|
mean value: 0.9368176538908246
|
|
|
|
key: train_accuracy
|
|
value: [0.97837838 0.98918919 0.98382749 0.98652291 0.98382749 0.98113208
|
|
0.99191375 0.98113208 0.98652291 0.98382749]
|
|
|
|
mean value: 0.9846273767028484
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.92682927 0.85714286 0.94736842 0.97435897 1.
|
|
0.95 0.93333333 0.88888889 0.93333333]
|
|
|
|
mean value: 0.9386864832500262
|
|
|
|
key: train_fscore
|
|
value: [0.97860963 0.98918919 0.98404255 0.98666667 0.98395722 0.98143236
|
|
0.99191375 0.98133333 0.98659517 0.98395722]
|
|
|
|
mean value: 0.984769708818797
|
|
|
|
key: test_precision
|
|
value: [1. 0.95 0.81818182 1. 1. 1.
|
|
1. 0.875 0.83333333 0.875 ]
|
|
|
|
mean value: 0.9351515151515152
|
|
|
|
key: train_precision
|
|
value: [0.96825397 0.98918919 0.97368421 0.97883598 0.9787234 0.96858639
|
|
0.98924731 0.96842105 0.9787234 0.97354497]
|
|
|
|
mean value: 0.9767209880755154
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.9047619 0.9 0.9 0.95 1.
|
|
0.9047619 1. 0.95238095 1. ]
|
|
|
|
mean value: 0.9464285714285714
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_7030.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.98918919 0.98918919 0.99462366 0.99462366 0.98924731 0.99462366
|
|
0.99459459 0.99459459 0.99459459 0.99459459]
|
|
|
|
mean value: 0.992987503632665
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.92857143 0.8547619 0.95 0.975 1.
|
|
0.95238095 0.925 0.87619048 0.925 ]
|
|
|
|
mean value: 0.9363095238095238
|
|
|
|
key: train_roc_auc
|
|
value: [0.97837838 0.98918919 0.98379831 0.98650102 0.98381285 0.98109561
|
|
0.99192095 0.98116827 0.98654461 0.98385644]
|
|
|
|
mean value: 0.9846265620459169
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.86363636 0.75 0.9 0.95 1.
|
|
0.9047619 0.875 0.8 0.875 ]
|
|
|
|
mean value: 0.8870779220779221
|
|
|
|
key: train_jcc
|
|
value: [0.95811518 0.97860963 0.96858639 0.97368421 0.96842105 0.96354167
|
|
0.98395722 0.96335079 0.97354497 0.96842105]
|
|
|
|
mean value: 0.9700232156941843
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.27132845 0.3317194 0.22286606 0.19475937 0.26474047 0.25768924
|
|
0.47041345 0.29687142 0.2708199 0.24366689]
|
|
|
|
mean value: 0.28248746395111085
|
|
|
|
key: score_time
|
|
value: [0.01650262 0.02799678 0.01505399 0.01735926 0.02323914 0.02022839
|
|
0.02487922 0.02377343 0.02366781 0.01204443]
|
|
|
|
mean value: 0.020474505424499512
|
|
|
|
key: test_mcc
|
|
value: [0.95346259 0.85811633 0.71121921 0.90649828 0.95227002 1.
|
|
0.90692382 0.80817439 0.7633652 0.86240942]
|
|
|
|
mean value: 0.8722439251771972
|
|
|
|
key: train_mcc
|
|
value: [0.95698047 0.97837838 0.96787795 0.97317174 0.96771006 0.96261094
|
|
0.98384191 0.96788166 0.97317407 0.96788166]
|
|
|
|
mean value: 0.96995088248731
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.92857143 0.85365854 0.95121951 0.97560976 1.
|
|
0.95121951 0.90243902 0.87804878 0.92682927]
|
|
|
|
mean value: 0.9343786295005807
|
|
|
|
key: train_accuracy
|
|
value: [0.97837838 0.98918919 0.98382749 0.98652291 0.98382749 0.98113208
|
|
0.99191375 0.98382749 0.98652291 0.98382749]
|
|
|
|
mean value: 0.9848969184818241
|
|
|
|
key: test_fscore
|
|
value: [0.97560976 0.92682927 0.85714286 0.94736842 0.97435897 1.
|
|
0.95 0.90909091 0.88888889 0.93333333]
|
|
|
|
mean value: 0.9362622408257838
|
|
|
|
key: train_fscore
|
|
value: [0.97860963 0.98918919 0.98404255 0.98666667 0.98395722 0.98143236
|
|
0.99191375 0.98395722 0.98659517 0.98395722]
|
|
|
|
mean value: 0.9850320974105973
|
|
|
|
key: test_precision
|
|
value: [1. 0.95 0.81818182 1. 1. 1.
|
|
1. 0.86956522 0.83333333 0.875 ]
|
|
|
|
mean value: 0.9346080368906455
|
|
|
|
key: train_precision
|
|
value: [0.96825397 0.98918919 0.97368421 0.97883598 0.9787234 0.96858639
|
|
0.98924731 0.97354497 0.9787234 0.97354497]
|
|
|
|
mean value: 0.9772333801668549
|
|
|
|
key: test_recall
|
|
value: [0.95238095 0.9047619 0.9 0.9 0.95 1.
|
|
0.9047619 0.95238095 0.95238095 1. ]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_recall
|
|
value: [0.98918919 0.98918919 0.99462366 0.99462366 0.98924731 0.99462366
|
|
0.99459459 0.99459459 0.99459459 0.99459459]
|
|
|
|
mean value: 0.992987503632665
|
|
|
|
key: test_roc_auc
|
|
value: [0.97619048 0.92857143 0.8547619 0.95 0.975 1.
|
|
0.95238095 0.90119048 0.87619048 0.925 ]
|
|
|
|
mean value: 0.9339285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.97837838 0.98918919 0.98379831 0.98650102 0.98381285 0.98109561
|
|
0.99192095 0.98385644 0.98654461 0.98385644]
|
|
|
|
mean value: 0.984895379250218
|
|
|
|
key: test_jcc
|
|
value: [0.95238095 0.86363636 0.75 0.9 0.95 1.
|
|
0.9047619 0.83333333 0.8 0.875 ]
|
|
|
|
mean value: 0.8829112554112554
|
|
|
|
key: train_jcc
|
|
value: [0.95811518 0.97860963 0.96858639 0.97368421 0.96842105 0.96354167
|
|
0.98395722 0.96842105 0.97354497 0.96842105]
|
|
|
|
mean value: 0.9705302424233108
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|