19863 lines
987 KiB
Text
19863 lines
987 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_sl.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 1133
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 1133
|
|
ncols: 274
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 339
|
|
log10_or_mychisq 339
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 169
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 176
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification according to scaling law [COMPLETE data]: 1/sqrt(x_ncols)
|
|
Input features data size: (1132, 176)
|
|
Train data size: (1046, 176)
|
|
Test data size: (86, 176)
|
|
y_train numbers: Counter({0: 764, 1: 282})
|
|
y_train ratio: 2.7092198581560285
|
|
|
|
y_test_numbers: Counter({0: 63, 1: 23})
|
|
y_test ratio: 2.739130434782609
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: True
|
|
|
|
index: 2
|
|
ind: 3
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({0: 764, 1: 282}) Data dim: (1046, 176)
|
|
|
|
Simple Random OverSampling
|
|
Counter({0: 764, 1: 764})
|
|
(1528, 176)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 282, 1: 282})
|
|
(564, 176)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 764, 1: 764})
|
|
(1528, 176)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 764, 1: 764})
|
|
(1528, 176)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis [COMPLETE DATA]: 70/30 split
|
|
Gene name: rpoB
|
|
Drug name: rifampicin
|
|
|
|
Output directory: /home/tanu/git/Data/rifampicin/output/ml/tts_cd_sl/
|
|
|
|
Sanity checks:
|
|
Total input features: 176
|
|
|
|
Training data size: (1046, 176)
|
|
Test data size: (86, 176)
|
|
|
|
Target feature numbers (training data): Counter({0: 764, 1: 282})
|
|
Target features ratio (training data: 2.7092198581560285
|
|
|
|
Target feature numbers (test data): Counter({0: 63, 1: 23})
|
|
Target features ratio (test data): 2.739130434782609
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 37
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_na_affinity', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04236889 0.04382324 0.04377055 0.04347372 0.04220366 0.0429163
|
|
0.04302216 0.04379559 0.04375434 0.04313993]
|
|
|
|
mean value: 0.04322683811187744
|
|
|
|
key: score_time
|
|
value: [0.01296234 0.01246238 0.01342416 0.01341486 0.01349759 0.01503015
|
|
0.01333165 0.01332355 0.01344228 0.01342034]
|
|
|
|
mean value: 0.013430929183959961
|
|
|
|
key: test_mcc
|
|
value: [0.58328237 0.74792687 0.52250489 0.45226702 0.61887477 0.61129493
|
|
0.77780413 0.49842509 0.50490733 0.60075141]
|
|
|
|
mean value: 0.5918038807372884
|
|
|
|
key: train_mcc
|
|
value: [0.69754041 0.69562544 0.70053863 0.69754041 0.6972804 0.71633551
|
|
0.67751516 0.70153268 0.69394012 0.68474486]
|
|
|
|
mean value: 0.69625936267133
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.9047619 0.81904762 0.8 0.84761905 0.84761905
|
|
0.91346154 0.81730769 0.79807692 0.84615385]
|
|
|
|
mean value: 0.8422619047619048
|
|
|
|
key: train_accuracy
|
|
value: [0.88204038 0.88204038 0.88416578 0.88204038 0.88310308 0.89054198
|
|
0.87473461 0.88535032 0.88110403 0.87791932]
|
|
|
|
mean value: 0.8823040267502387
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.8 0.64150943 0.57142857 0.72413793 0.71428571
|
|
0.83636364 0.59574468 0.6440678 0.7037037 ]
|
|
|
|
mean value: 0.6931241468239606
|
|
|
|
key: train_fscore
|
|
value: [0.77755511 0.77484787 0.77800407 0.77755511 0.7755102 0.78936605
|
|
0.76209677 0.7768595 0.77419355 0.76673428]
|
|
|
|
mean value: 0.7752722527826301
|
|
|
|
key: test_precision
|
|
value: [0.65625 0.90909091 0.68 0.66666667 0.72413793 0.74074074
|
|
0.85185185 0.73684211 0.61290323 0.73076923]
|
|
|
|
mean value: 0.7309252661223491
|
|
|
|
key: train_precision
|
|
value: [0.79183673 0.79916318 0.80590717 0.79183673 0.80168776 0.81779661
|
|
0.78099174 0.8173913 0.79338843 0.79079498]
|
|
|
|
mean value: 0.7990794644899005
|
|
|
|
key: test_recall
|
|
value: [0.75 0.71428571 0.60714286 0.5 0.72413793 0.68965517
|
|
0.82142857 0.5 0.67857143 0.67857143]
|
|
|
|
mean value: 0.6663793103448276
|
|
|
|
key: train_recall
|
|
value: [0.76377953 0.7519685 0.7519685 0.76377953 0.75098814 0.76284585
|
|
0.74409449 0.74015748 0.75590551 0.74409449]
|
|
|
|
mean value: 0.7529582023590925
|
|
|
|
key: test_roc_auc
|
|
value: [0.80357143 0.84415584 0.75162338 0.70454545 0.80943739 0.79877495
|
|
0.8843985 0.71710526 0.76033835 0.79323308]
|
|
|
|
mean value: 0.7867183633063851
|
|
|
|
key: train_roc_auc
|
|
value: [0.84477186 0.84104975 0.84250536 0.84477186 0.84133709 0.85017292
|
|
0.8335298 0.83955548 0.84161555 0.83571003]
|
|
|
|
mean value: 0.8415019719354448
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.66666667 0.47222222 0.4 0.56756757 0.55555556
|
|
0.71875 0.42424242 0.475 0.54285714]
|
|
|
|
mean value: 0.5361323117573118
|
|
|
|
key: train_jcc
|
|
value: [0.63606557 0.63245033 0.63666667 0.63606557 0.63333333 0.65202703
|
|
0.61563518 0.63513514 0.63157895 0.62171053]
|
|
|
|
mean value: 0.6330668293666278
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.1041894 1.01648998 1.09410548 0.97850752 1.03828073 1.01609421
|
|
1.00839067 1.07363677 0.96649861 1.05600548]
|
|
|
|
mean value: 1.0352198839187623
|
|
|
|
key: score_time
|
|
value: [0.01517916 0.0153811 0.01470447 0.01309204 0.01309061 0.01493931
|
|
0.03037095 0.01531339 0.01524711 0.01529431]
|
|
|
|
mean value: 0.01626124382019043
|
|
|
|
key: test_mcc
|
|
value: [0.55397431 0.74792687 0.54365409 0.45226702 0.66651543 0.61129493
|
|
0.7556391 0.55869884 0.57050491 0.64104064]
|
|
|
|
mean value: 0.610151611840308
|
|
|
|
key: train_mcc
|
|
value: [0.74064163 0.74810714 0.74774707 0.74474526 0.7259761 0.76738935
|
|
0.73281749 0.76467178 0.74184063 0.76061874]
|
|
|
|
mean value: 0.7474555184521847
|
|
|
|
key: test_accuracy
|
|
value: [0.81904762 0.9047619 0.82857143 0.8 0.86666667 0.84761905
|
|
0.90384615 0.83653846 0.82692308 0.86538462]
|
|
|
|
mean value: 0.8499358974358975
|
|
|
|
key: train_accuracy
|
|
value: [0.89904357 0.90223167 0.90116897 0.90010627 0.89373007 0.90967056
|
|
0.89596603 0.90870488 0.89915074 0.90658174]
|
|
|
|
mean value: 0.9016354512861819
|
|
|
|
key: test_fscore
|
|
value: [0.6779661 0.8 0.65384615 0.57142857 0.75862069 0.71428571
|
|
0.82142857 0.65306122 0.68965517 0.72 ]
|
|
|
|
mean value: 0.7060292199242688
|
|
|
|
key: train_fscore
|
|
value: [0.80885312 0.81376518 0.81510934 0.812749 0.79757085 0.82828283
|
|
0.80321285 0.82591093 0.81037924 0.824 ]
|
|
|
|
mean value: 0.8139833351400889
|
|
|
|
key: test_precision
|
|
value: [0.64516129 0.90909091 0.70833333 0.66666667 0.75862069 0.74074074
|
|
0.82142857 0.76190476 0.66666667 0.81818182]
|
|
|
|
mean value: 0.7496795447991221
|
|
|
|
key: train_precision
|
|
value: [0.82716049 0.8375 0.82329317 0.82258065 0.81742739 0.84710744
|
|
0.81967213 0.85 0.82186235 0.83739837]
|
|
|
|
mean value: 0.8304001988897277
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.71428571 0.60714286 0.5 0.75862069 0.68965517
|
|
0.82142857 0.57142857 0.71428571 0.64285714]
|
|
|
|
mean value: 0.6733990147783251
|
|
|
|
key: train_recall
|
|
value: [0.79133858 0.79133858 0.80708661 0.80314961 0.77865613 0.81027668
|
|
0.78740157 0.80314961 0.7992126 0.81102362]
|
|
|
|
mean value: 0.7982633593725685
|
|
|
|
key: test_roc_auc
|
|
value: [0.78571429 0.84415584 0.75811688 0.70454545 0.83325771 0.79877495
|
|
0.87781955 0.75281955 0.79135338 0.79511278]
|
|
|
|
mean value: 0.7941670398566951
|
|
|
|
key: train_roc_auc
|
|
value: [0.86510161 0.86728501 0.87152002 0.86955151 0.85735132 0.87824881
|
|
0.86172404 0.87541201 0.86762956 0.87644204]
|
|
|
|
mean value: 0.8690265926885523
|
|
|
|
key: test_jcc
|
|
value: [0.51282051 0.66666667 0.48571429 0.4 0.61111111 0.55555556
|
|
0.6969697 0.48484848 0.52631579 0.5625 ]
|
|
|
|
mean value: 0.5502502103159997
|
|
|
|
key: train_jcc
|
|
value: [0.67905405 0.68600683 0.68791946 0.68456376 0.66329966 0.70689655
|
|
0.67114094 0.70344828 0.68120805 0.70068027]
|
|
|
|
mean value: 0.6864217857752435
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01747584 0.01243806 0.01206231 0.01181126 0.01183248 0.0118134
|
|
0.01176119 0.01189089 0.01197147 0.01748323]
|
|
|
|
mean value: 0.0130540132522583
|
|
|
|
key: score_time
|
|
value: [0.01280618 0.00985122 0.00949407 0.00932932 0.00925398 0.00933886
|
|
0.00930572 0.00927496 0.00944495 0.00934601]
|
|
|
|
mean value: 0.009744524955749512
|
|
|
|
key: test_mcc
|
|
value: [0.48970766 0.61790658 0.41115902 0.49452538 0.40771119 0.36373741
|
|
0.48774936 0.28496141 0.48480651 0.51228063]
|
|
|
|
mean value: 0.45545451503746576
|
|
|
|
key: train_mcc
|
|
value: [0.4701369 0.47348408 0.50384535 0.47348408 0.44727079 0.50026056
|
|
0.43890761 0.48852748 0.4724221 0.48520474]
|
|
|
|
mean value: 0.4753543688022016
|
|
|
|
key: test_accuracy
|
|
value: [0.79047619 0.85714286 0.75238095 0.8 0.75238095 0.74285714
|
|
0.78846154 0.73076923 0.76923077 0.81730769]
|
|
|
|
mean value: 0.7801007326007325
|
|
|
|
key: train_accuracy
|
|
value: [0.78958555 0.79064825 0.78746015 0.79064825 0.77789586 0.79914984
|
|
0.77176221 0.79299363 0.78237792 0.79193206]
|
|
|
|
mean value: 0.7874453702638247
|
|
|
|
key: test_fscore
|
|
value: [0.63333333 0.70588235 0.58064516 0.63157895 0.58064516 0.54237288
|
|
0.63333333 0.46153846 0.63636364 0.62745098]
|
|
|
|
mean value: 0.6033144249207096
|
|
|
|
key: train_fscore
|
|
value: [0.61478599 0.61747573 0.64788732 0.61747573 0.60038241 0.63862333
|
|
0.59662289 0.63137996 0.62246777 0.62878788]
|
|
|
|
mean value: 0.6215889010535461
|
|
|
|
key: test_precision
|
|
value: [0.59375 0.7826087 0.52941176 0.62068966 0.54545455 0.53333333
|
|
0.59375 0.5 0.55263158 0.69565217]
|
|
|
|
mean value: 0.5947281747178761
|
|
|
|
key: train_precision
|
|
value: [0.60769231 0.6091954 0.58598726 0.6091954 0.58148148 0.61851852
|
|
0.56989247 0.60727273 0.58477509 0.60583942]
|
|
|
|
mean value: 0.5979850076391097
|
|
|
|
key: test_recall
|
|
value: [0.67857143 0.64285714 0.64285714 0.64285714 0.62068966 0.55172414
|
|
0.67857143 0.42857143 0.75 0.57142857]
|
|
|
|
mean value: 0.6208128078817734
|
|
|
|
key: train_recall
|
|
value: [0.62204724 0.62598425 0.72440945 0.62598425 0.62055336 0.66007905
|
|
0.62598425 0.65748031 0.66535433 0.65354331]
|
|
|
|
mean value: 0.6481419812641996
|
|
|
|
key: test_roc_auc
|
|
value: [0.75487013 0.78896104 0.71753247 0.75 0.71166062 0.68375681
|
|
0.7537594 0.63533835 0.76315789 0.73966165]
|
|
|
|
mean value: 0.7298698352464232
|
|
|
|
key: train_roc_auc
|
|
value: [0.73678781 0.73875632 0.76759046 0.73875632 0.72815459 0.75518487
|
|
0.72578282 0.75025179 0.74546786 0.74828328]
|
|
|
|
mean value: 0.7435016124797015
|
|
|
|
key: test_jcc
|
|
value: [0.46341463 0.54545455 0.40909091 0.46153846 0.40909091 0.37209302
|
|
0.46341463 0.3 0.46666667 0.45714286]
|
|
|
|
mean value: 0.4347906640532846
|
|
|
|
key: train_jcc
|
|
value: [0.44382022 0.44662921 0.47916667 0.44662921 0.42896175 0.46910112
|
|
0.42513369 0.46132597 0.45187166 0.45856354]
|
|
|
|
mean value: 0.4511203040937459
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02185965 0.01668096 0.01663589 0.01665902 0.01663756 0.01665616
|
|
0.01671576 0.01665163 0.01670074 0.01673222]
|
|
|
|
mean value: 0.017192959785461426
|
|
|
|
key: score_time
|
|
value: [0.01263666 0.01249433 0.01248217 0.0124073 0.01244497 0.01245093
|
|
0.01247048 0.0124166 0.01245403 0.01244998]
|
|
|
|
mean value: 0.012470746040344238
|
|
|
|
key: test_mcc
|
|
value: [0.46169424 0.54365409 0.48290512 0.4833301 0.25771183 0.52610685
|
|
0.50062617 0.37653605 0.48774936 0.6167457 ]
|
|
|
|
mean value: 0.4737059511754335
|
|
|
|
key: train_mcc
|
|
value: [0.51524538 0.49980011 0.5109142 0.54739157 0.52445334 0.50954181
|
|
0.5231752 0.52922993 0.55907902 0.49883421]
|
|
|
|
mean value: 0.5217664774967424
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.82857143 0.8 0.80952381 0.72380952 0.81904762
|
|
0.80769231 0.77884615 0.78846154 0.85576923]
|
|
|
|
mean value: 0.8011721611721612
|
|
|
|
key: train_accuracy
|
|
value: [0.81296493 0.80871413 0.80977683 0.82571732 0.81934113 0.81083953
|
|
0.8163482 0.82059448 0.83121019 0.80785563]
|
|
|
|
mean value: 0.8163362371421287
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.65384615 0.61818182 0.6 0.43137255 0.64150943
|
|
0.62962963 0.48888889 0.63333333 0.70588235]
|
|
|
|
mean value: 0.5990879453920519
|
|
|
|
key: train_fscore
|
|
value: [0.64081633 0.62655602 0.63983903 0.66393443 0.64285714 0.63673469
|
|
0.64621677 0.64718163 0.67080745 0.62680412]
|
|
|
|
mean value: 0.6441747614733685
|
|
|
|
key: test_precision
|
|
value: [0.65217391 0.70833333 0.62962963 0.68181818 0.5 0.70833333
|
|
0.65384615 0.64705882 0.59375 0.7826087 ]
|
|
|
|
mean value: 0.6557552064185697
|
|
|
|
key: train_precision
|
|
value: [0.66525424 0.6622807 0.65432099 0.69230769 0.68609865 0.65822785
|
|
0.67234043 0.68888889 0.70742358 0.65800866]
|
|
|
|
mean value: 0.6745151675029809
|
|
|
|
key: test_recall
|
|
value: [0.53571429 0.60714286 0.60714286 0.53571429 0.37931034 0.5862069
|
|
0.60714286 0.39285714 0.67857143 0.64285714]
|
|
|
|
mean value: 0.5572660098522167
|
|
|
|
key: train_recall
|
|
value: [0.61811024 0.59448819 0.62598425 0.63779528 0.60474308 0.61660079
|
|
0.62204724 0.61023622 0.63779528 0.5984252 ]
|
|
|
|
mean value: 0.6166225763281566
|
|
|
|
key: test_roc_auc
|
|
value: [0.71590909 0.75811688 0.73863636 0.7224026 0.61728675 0.74705082
|
|
0.7443609 0.65695489 0.7537594 0.78853383]
|
|
|
|
mean value: 0.7243011525679403
|
|
|
|
key: train_roc_auc
|
|
value: [0.75155876 0.74120334 0.75185675 0.76649589 0.75149945 0.74943412
|
|
0.75506432 0.75424602 0.77020578 0.74179981]
|
|
|
|
mean value: 0.7533364227228272
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.48571429 0.44736842 0.42857143 0.275 0.47222222
|
|
0.45945946 0.32352941 0.46341463 0.54545455]
|
|
|
|
mean value: 0.43174010750522873
|
|
|
|
key: train_jcc
|
|
value: [0.47147147 0.45619335 0.4704142 0.49693252 0.47368421 0.46706587
|
|
0.47734139 0.47839506 0.5046729 0.45645646]
|
|
|
|
mean value: 0.4752627425365646
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01516509 0.01174545 0.01215386 0.01092553 0.01212525 0.01202726
|
|
0.01277304 0.01204705 0.01243901 0.01097012]
|
|
|
|
mean value: 0.012237167358398438
|
|
|
|
key: score_time
|
|
value: [0.07836294 0.01655698 0.01671529 0.01539016 0.01582408 0.01494884
|
|
0.01525927 0.01764107 0.01760817 0.01412106]
|
|
|
|
mean value: 0.02224278450012207
|
|
|
|
key: test_mcc
|
|
value: [0.41143529 0.32491822 0.36181466 0.22372097 0.40554533 0.336517
|
|
0.49160514 0.26870862 0.49076688 0.41887528]
|
|
|
|
mean value: 0.3733907383002513
|
|
|
|
key: train_mcc
|
|
value: [0.60823432 0.60243382 0.62057875 0.62994145 0.60021779 0.62016187
|
|
0.60601869 0.58023096 0.58284544 0.60519626]
|
|
|
|
mean value: 0.6055859351093665
|
|
|
|
key: test_accuracy
|
|
value: [0.79047619 0.77142857 0.78095238 0.73333333 0.78095238 0.74285714
|
|
0.81730769 0.75 0.80769231 0.78846154]
|
|
|
|
mean value: 0.7763461538461538
|
|
|
|
key: train_accuracy
|
|
value: [0.8544102 0.8522848 0.858661 0.8618491 0.8522848 0.858661
|
|
0.85350318 0.84501062 0.84501062 0.85350318]
|
|
|
|
mean value: 0.8535178504143625
|
|
|
|
key: test_fscore
|
|
value: [0.52173913 0.4 0.43902439 0.36363636 0.53061224 0.50909091
|
|
0.53658537 0.38095238 0.61538462 0.54166667]
|
|
|
|
mean value: 0.4838692067161238
|
|
|
|
key: train_fscore
|
|
value: [0.68505747 0.6819222 0.69565217 0.70454545 0.67447307 0.6984127
|
|
0.68636364 0.66046512 0.67117117 0.68202765]
|
|
|
|
mean value: 0.6840090636431057
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.69230769 0.5 0.65 0.53846154
|
|
0.84615385 0.57142857 0.66666667 0.65 ]
|
|
|
|
mean value: 0.6448351648351648
|
|
|
|
key: train_precision
|
|
value: [0.82320442 0.81420765 0.83060109 0.83333333 0.82758621 0.81914894
|
|
0.81182796 0.80681818 0.78421053 0.82222222]
|
|
|
|
mean value: 0.817316052680444
|
|
|
|
key: test_recall
|
|
value: [0.42857143 0.28571429 0.32142857 0.28571429 0.44827586 0.48275862
|
|
0.39285714 0.28571429 0.57142857 0.46428571]
|
|
|
|
mean value: 0.39667487684729064
|
|
|
|
key: train_recall
|
|
value: [0.58661417 0.58661417 0.5984252 0.61023622 0.56916996 0.60869565
|
|
0.59448819 0.55905512 0.58661417 0.58267717]
|
|
|
|
mean value: 0.5882590022097041
|
|
|
|
key: test_roc_auc
|
|
value: [0.67532468 0.61688312 0.63474026 0.59090909 0.6780853 0.66243194
|
|
0.68327068 0.60338346 0.73308271 0.68609023]
|
|
|
|
mean value: 0.6564201451905626
|
|
|
|
key: train_roc_auc
|
|
value: [0.77001742 0.76856182 0.77665074 0.78255625 0.76278265 0.77963852
|
|
0.77180805 0.75481826 0.76351057 0.76808277]
|
|
|
|
mean value: 0.7698427047878688
|
|
|
|
key: test_jcc
|
|
value: [0.35294118 0.25 0.28125 0.22222222 0.36111111 0.34146341
|
|
0.36666667 0.23529412 0.44444444 0.37142857]
|
|
|
|
mean value: 0.32268217246248093
|
|
|
|
key: train_jcc
|
|
value: [0.52097902 0.51736111 0.53333333 0.54385965 0.50883392 0.53658537
|
|
0.52249135 0.49305556 0.50508475 0.51748252]
|
|
|
|
mean value: 0.5199066570943169
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04720616 0.04593897 0.05266237 0.04500222 0.05246019 0.04854155
|
|
0.0452261 0.05236244 0.04608631 0.04607701]
|
|
|
|
mean value: 0.048156332969665525
|
|
|
|
key: score_time
|
|
value: [0.01932859 0.01811695 0.01889729 0.01908994 0.01902556 0.0187459
|
|
0.01789069 0.01797652 0.01783562 0.01798344]
|
|
|
|
mean value: 0.01848905086517334
|
|
|
|
key: test_mcc
|
|
value: [0.44635733 0.58501794 0.46169424 0.47532708 0.54269344 0.4580397
|
|
0.70087664 0.49378081 0.4575153 0.59366961]
|
|
|
|
mean value: 0.5214972086290752
|
|
|
|
key: train_mcc
|
|
value: [0.68189199 0.66634547 0.69637962 0.69141432 0.68071523 0.68491405
|
|
0.66336854 0.67552174 0.69592264 0.67039081]
|
|
|
|
mean value: 0.6806864420729674
|
|
|
|
key: test_accuracy
|
|
value: [0.78095238 0.84761905 0.8 0.80952381 0.81904762 0.78095238
|
|
0.88461538 0.81730769 0.77884615 0.84615385]
|
|
|
|
mean value: 0.8165018315018315
|
|
|
|
key: train_accuracy
|
|
value: [0.87566419 0.86928799 0.88310308 0.87991498 0.87672689 0.87672689
|
|
0.86836518 0.87473461 0.87898089 0.87154989]
|
|
|
|
mean value: 0.8755054590251596
|
|
|
|
key: test_fscore
|
|
value: [0.59649123 0.66666667 0.58823529 0.58333333 0.66666667 0.61016949
|
|
0.77777778 0.57777778 0.61016949 0.69230769]
|
|
|
|
mean value: 0.6369595419768584
|
|
|
|
key: train_fscore
|
|
value: [0.76646707 0.7554672 0.77366255 0.77263581 0.76326531 0.7689243
|
|
0.75298805 0.75918367 0.77906977 0.75751503]
|
|
|
|
mean value: 0.7649178756708441
|
|
|
|
key: test_precision
|
|
value: [0.5862069 0.8 0.65217391 0.7 0.67857143 0.6
|
|
0.80769231 0.76470588 0.58064516 0.75 ]
|
|
|
|
mean value: 0.6919995589502203
|
|
|
|
key: train_precision
|
|
value: [0.77732794 0.76305221 0.81034483 0.79012346 0.78902954 0.7751004
|
|
0.76209677 0.78813559 0.76717557 0.77142857]
|
|
|
|
mean value: 0.7793814877267291
|
|
|
|
key: test_recall
|
|
value: [0.60714286 0.57142857 0.53571429 0.5 0.65517241 0.62068966
|
|
0.75 0.46428571 0.64285714 0.64285714]
|
|
|
|
mean value: 0.5990147783251232
|
|
|
|
key: train_recall
|
|
value: [0.75590551 0.7480315 0.74015748 0.75590551 0.73913043 0.76284585
|
|
0.74409449 0.73228346 0.79133858 0.74409449]
|
|
|
|
mean value: 0.7513787308207027
|
|
|
|
key: test_roc_auc
|
|
value: [0.72564935 0.75974026 0.71590909 0.71103896 0.76837568 0.73139746
|
|
0.84210526 0.70582707 0.73590226 0.78195489]
|
|
|
|
mean value: 0.7477900275767789
|
|
|
|
key: train_roc_auc
|
|
value: [0.83792364 0.83107543 0.83805545 0.84083485 0.83322801 0.84072525
|
|
0.82916934 0.82980452 0.8513379 0.83134957]
|
|
|
|
mean value: 0.8363503958426319
|
|
|
|
key: test_jcc
|
|
value: [0.425 0.5 0.41666667 0.41176471 0.5 0.43902439
|
|
0.63636364 0.40625 0.43902439 0.52941176]
|
|
|
|
mean value: 0.4703505554106343
|
|
|
|
key: train_jcc
|
|
value: [0.62135922 0.60702875 0.63087248 0.6295082 0.61716172 0.62459547
|
|
0.60383387 0.61184211 0.63809524 0.60967742]
|
|
|
|
mean value: 0.619397447119258
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.22862315 2.78592253 3.49585414 2.10523105 2.59978676 3.39329648
|
|
2.20125008 3.0786252 2.87909365 3.44558978]
|
|
|
|
mean value: 2.82132728099823
|
|
|
|
key: score_time
|
|
value: [0.01277804 0.01293755 0.02243972 0.01297092 0.01325655 0.01450968
|
|
0.0154767 0.01281786 0.01280737 0.0148108 ]
|
|
|
|
mean value: 0.014480519294738769
|
|
|
|
key: test_mcc
|
|
value: [0.64128534 0.75176462 0.41143529 0.49236596 0.56255844 0.45486506
|
|
0.61269947 0.62946984 0.52265611 0.63773267]
|
|
|
|
mean value: 0.5716832805218365
|
|
|
|
key: train_mcc
|
|
value: [0.86553093 0.90693263 0.93760549 0.8245254 0.85701575 0.964908
|
|
0.85218632 0.90040276 0.89116644 0.93504603]
|
|
|
|
mean value: 0.8935319759227061
|
|
|
|
key: test_accuracy
|
|
value: [0.84761905 0.9047619 0.79047619 0.80952381 0.82857143 0.79047619
|
|
0.85576923 0.85576923 0.80769231 0.85576923]
|
|
|
|
mean value: 0.8346428571428571
|
|
|
|
key: train_accuracy
|
|
value: [0.94686504 0.96280553 0.97555792 0.93092455 0.94473964 0.98618491
|
|
0.94267516 0.96072187 0.95753715 0.97452229]
|
|
|
|
mean value: 0.958253405262956
|
|
|
|
key: test_fscore
|
|
value: [0.74193548 0.81481481 0.52173913 0.61538462 0.67857143 0.59259259
|
|
0.69387755 0.72727273 0.65517241 0.73684211]
|
|
|
|
mean value: 0.6778202863018599
|
|
|
|
key: train_fscore
|
|
value: [0.90196078 0.93230174 0.95334686 0.87179487 0.89211618 0.97435897
|
|
0.88510638 0.92730845 0.91967871 0.95238095]
|
|
|
|
mean value: 0.9210353907992582
|
|
|
|
key: test_precision
|
|
value: [0.67647059 0.84615385 0.66666667 0.66666667 0.7037037 0.64
|
|
0.80952381 0.74074074 0.63333333 0.72413793]
|
|
|
|
mean value: 0.7107397286058543
|
|
|
|
key: train_precision
|
|
value: [0.8984375 0.91634981 0.9832636 0.87351779 0.93886463 0.97244094
|
|
0.96296296 0.9254902 0.93852459 0.96 ]
|
|
|
|
mean value: 0.9369852017681736
|
|
|
|
key: test_recall
|
|
value: [0.82142857 0.78571429 0.42857143 0.57142857 0.65517241 0.55172414
|
|
0.60714286 0.71428571 0.67857143 0.75 ]
|
|
|
|
mean value: 0.6564039408866995
|
|
|
|
key: train_recall
|
|
value: [0.90551181 0.9488189 0.92519685 0.87007874 0.84980237 0.97628458
|
|
0.81889764 0.92913386 0.9015748 0.94488189]
|
|
|
|
mean value: 0.9070181444710715
|
|
|
|
key: test_roc_auc
|
|
value: [0.83928571 0.86688312 0.67532468 0.73376623 0.77495463 0.71665154
|
|
0.77725564 0.81109023 0.76691729 0.82236842]
|
|
|
|
mean value: 0.7784497489806019
|
|
|
|
key: train_roc_auc
|
|
value: [0.93383305 0.9583978 0.95968722 0.9117497 0.91472677 0.98305508
|
|
0.90363487 0.95075879 0.93988624 0.9651735 ]
|
|
|
|
mean value: 0.9420903025594923
|
|
|
|
key: test_jcc
|
|
value: [0.58974359 0.6875 0.35294118 0.44444444 0.51351351 0.42105263
|
|
0.53125 0.57142857 0.48717949 0.58333333]
|
|
|
|
mean value: 0.5182386747692476
|
|
|
|
key: train_jcc
|
|
value: [0.82142857 0.87318841 0.91085271 0.77272727 0.80524345 0.95
|
|
0.79389313 0.86446886 0.85130112 0.90909091]
|
|
|
|
mean value: 0.8552194427396526
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05920386 0.04278469 0.04925251 0.04157639 0.04870343 0.04426932
|
|
0.04905319 0.04765654 0.04861045 0.04428649]
|
|
|
|
mean value: 0.047539687156677245
|
|
|
|
key: score_time
|
|
value: [0.00967717 0.00915051 0.00912809 0.00910258 0.00926304 0.00927639
|
|
0.0093503 0.00934386 0.00939941 0.00914288]
|
|
|
|
mean value: 0.00928342342376709
|
|
|
|
key: test_mcc
|
|
value: [0.48970766 0.68719761 0.63006972 0.80519481 0.72090911 0.63902955
|
|
0.62241681 0.72259164 0.6470834 0.70087664]
|
|
|
|
mean value: 0.6665076947650287
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79047619 0.87619048 0.84761905 0.92380952 0.88571429 0.85714286
|
|
0.85576923 0.88461538 0.85576923 0.88461538]
|
|
|
|
mean value: 0.8661721611721611
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.63333333 0.77192982 0.73333333 0.85714286 0.8 0.73684211
|
|
0.71698113 0.8 0.74576271 0.77777778]
|
|
|
|
mean value: 0.7573103075351741
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.59375 0.75862069 0.6875 0.85714286 0.77419355 0.75
|
|
0.76 0.75 0.70967742 0.80769231]
|
|
|
|
mean value: 0.7448576822232272
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.67857143 0.78571429 0.78571429 0.85714286 0.82758621 0.72413793
|
|
0.67857143 0.85714286 0.78571429 0.75 ]
|
|
|
|
mean value: 0.7730295566502463
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75487013 0.8474026 0.82792208 0.9025974 0.86774047 0.81601633
|
|
0.79981203 0.87593985 0.83364662 0.84210526]
|
|
|
|
mean value: 0.8368052772998327
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.46341463 0.62857143 0.57894737 0.75 0.66666667 0.58333333
|
|
0.55882353 0.66666667 0.59459459 0.63636364]
|
|
|
|
mean value: 0.6127381858175485
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17840528 0.17516661 0.17449522 0.17805219 0.17762661 0.17407775
|
|
0.17794991 0.17574143 0.17979455 0.18026257]
|
|
|
|
mean value: 0.17715721130371093
|
|
|
|
key: score_time
|
|
value: [0.01917481 0.0194056 0.01928663 0.01901174 0.01909161 0.02058864
|
|
0.01914811 0.02079082 0.02084541 0.01970816]
|
|
|
|
mean value: 0.01970515251159668
|
|
|
|
key: test_mcc
|
|
value: [0.63073322 0.63986337 0.46169424 0.46857908 0.62409133 0.48456568
|
|
0.66672447 0.53488104 0.57050491 0.55452488]
|
|
|
|
mean value: 0.5636162225886381
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.86666667 0.8 0.80952381 0.85714286 0.8
|
|
0.875 0.82692308 0.82692308 0.83653846]
|
|
|
|
mean value: 0.8355860805860806
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.70833333 0.58823529 0.56521739 0.70588235 0.61818182
|
|
0.72340426 0.64 0.68965517 0.63829787]
|
|
|
|
mean value: 0.6604480217224418
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.74074074 0.85 0.65217391 0.72222222 0.81818182 0.65384615
|
|
0.89473684 0.72727273 0.66666667 0.78947368]
|
|
|
|
mean value: 0.7515314768289597
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.60714286 0.53571429 0.46428571 0.62068966 0.5862069
|
|
0.60714286 0.57142857 0.71428571 0.53571429]
|
|
|
|
mean value: 0.5956896551724138
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81168831 0.78409091 0.71590909 0.69967532 0.78402904 0.73389292
|
|
0.79041353 0.7462406 0.79135338 0.74154135]
|
|
|
|
mean value: 0.7598834468616682
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.5483871 0.41666667 0.39393939 0.54545455 0.44736842
|
|
0.56666667 0.47058824 0.52631579 0.46875 ]
|
|
|
|
mean value: 0.4955565386750471
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01375198 0.01371026 0.01383424 0.01265526 0.01369977 0.01378274
|
|
0.01375723 0.01389861 0.01403022 0.01332092]
|
|
|
|
mean value: 0.013644123077392578
|
|
|
|
key: score_time
|
|
value: [0.00919485 0.00999022 0.01008248 0.00961518 0.01013923 0.01005077
|
|
0.00919127 0.01002955 0.01016307 0.00959158]
|
|
|
|
mean value: 0.009804821014404297
|
|
|
|
key: test_mcc
|
|
value: [0.4719399 0.46169424 0.38138504 0.29074308 0.43743823 0.2700595
|
|
0.36876733 0.26794721 0.34768516 0.45056356]
|
|
|
|
mean value: 0.37482232502821944
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.8 0.75238095 0.74285714 0.76190476 0.6952381
|
|
0.75961538 0.70192308 0.74038462 0.78846154]
|
|
|
|
mean value: 0.7542765567765568
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.60377358 0.58823529 0.55172414 0.44897959 0.6031746 0.48387097
|
|
0.52830189 0.47457627 0.52631579 0.59259259]
|
|
|
|
mean value: 0.5401544719752785
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.64 0.65217391 0.53333333 0.52380952 0.55882353 0.45454545
|
|
0.56 0.4516129 0.51724138 0.61538462]
|
|
|
|
mean value: 0.5506924652064321
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.57142857 0.53571429 0.57142857 0.39285714 0.65517241 0.51724138
|
|
0.5 0.5 0.53571429 0.57142857]
|
|
|
|
mean value: 0.5350985221674877
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.72727273 0.71590909 0.69480519 0.63149351 0.728902 0.64019964
|
|
0.67763158 0.63815789 0.67575188 0.71992481]
|
|
|
|
mean value: 0.6850048318287882
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.43243243 0.41666667 0.38095238 0.28947368 0.43181818 0.31914894
|
|
0.35897436 0.31111111 0.35714286 0.42105263]
|
|
|
|
mean value: 0.37187732410576757
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.96372914 2.92686152 2.83464742 2.84536767 2.8680954 2.82494855
|
|
2.91374612 2.87321639 2.98415089 2.86491728]
|
|
|
|
mean value: 2.8899680376052856
|
|
|
|
key: score_time
|
|
value: [0.10747576 0.10349965 0.10398197 0.1010704 0.09961128 0.10258842
|
|
0.10371041 0.10676098 0.0987711 0.09880328]
|
|
|
|
mean value: 0.1026273250579834
|
|
|
|
key: test_mcc
|
|
value: [0.63902955 0.72147357 0.68000926 0.74955423 0.80586246 0.5768334
|
|
0.80100188 0.65081403 0.7556391 0.74802371]
|
|
|
|
mean value: 0.7128241193614874
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.8952381 0.87619048 0.9047619 0.92380952 0.83809524
|
|
0.92307692 0.86538462 0.90384615 0.90384615]
|
|
|
|
mean value: 0.8891391941391942
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.7755102 0.76363636 0.79166667 0.84615385 0.67924528
|
|
0.85185185 0.74074074 0.82142857 0.80769231]
|
|
|
|
mean value: 0.7814767940534006
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.72413793 0.9047619 0.77777778 0.95 0.95652174 0.75
|
|
0.88461538 0.76923077 0.82142857 0.875 ]
|
|
|
|
mean value: 0.8413474077979325
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.67857143 0.75 0.67857143 0.75862069 0.62068966
|
|
0.82142857 0.71428571 0.82142857 0.75 ]
|
|
|
|
mean value: 0.7343596059113301
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.82305195 0.8262987 0.83603896 0.83279221 0.8727314 0.77087114
|
|
0.89097744 0.81766917 0.87781955 0.85526316]
|
|
|
|
mean value: 0.8403513682324935
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.63333333 0.61764706 0.65517241 0.73333333 0.51428571
|
|
0.74193548 0.58823529 0.6969697 0.67741935]
|
|
|
|
mean value: 0.6441665016699368
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...05', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [2.05575657 1.243366 1.28098345 1.19924212 1.17100382 1.13483191
|
|
1.20760489 1.19215012 1.19190717 1.22347903]
|
|
|
|
mean value: 1.2900325059890747
|
|
|
|
key: score_time
|
|
value: [0.2919271 0.28175187 0.25639915 0.2832191 0.28457761 0.26847863
|
|
0.28040123 0.30023789 0.2938211 0.13949108]
|
|
|
|
mean value: 0.2680304765701294
|
|
|
|
key: test_mcc
|
|
value: [0.65909091 0.6947088 0.70779221 0.67040356 0.77973963 0.5832648
|
|
0.7746065 0.64512098 0.70087664 0.72121077]
|
|
|
|
mean value: 0.6936814791261937
|
|
|
|
key: train_mcc
|
|
value: [0.92659451 0.93211188 0.94304801 0.93210595 0.93190315 0.91818138
|
|
0.92936458 0.92389796 0.94030922 0.9321045 ]
|
|
|
|
mean value: 0.9309621133073288
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.88571429 0.88571429 0.87619048 0.91428571 0.83809524
|
|
0.91346154 0.86538462 0.88461538 0.89423077]
|
|
|
|
mean value: 0.8824358974358975
|
|
|
|
key: train_accuracy
|
|
value: [0.97130712 0.97343252 0.97768332 0.97343252 0.97343252 0.96811902
|
|
0.97239915 0.97027601 0.97664544 0.97346072]
|
|
|
|
mean value: 0.9730188330163285
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.75 0.78571429 0.71111111 0.83018868 0.69090909
|
|
0.83018868 0.73076923 0.77777778 0.78431373]
|
|
|
|
mean value: 0.7640972580262259
|
|
|
|
key: train_fscore
|
|
value: [0.94545455 0.9498998 0.95757576 0.94929006 0.94929006 0.93852459
|
|
0.94758065 0.94308943 0.95582329 0.94949495]
|
|
|
|
mean value: 0.9486023133220529
|
|
|
|
key: test_precision
|
|
value: [0.75 0.9 0.78571429 0.94117647 0.91666667 0.73076923
|
|
0.88 0.79166667 0.80769231 0.86956522]
|
|
|
|
mean value: 0.8373250845488697
|
|
|
|
key: train_precision
|
|
value: [0.97095436 0.96734694 0.98340249 0.9790795 0.975 0.97446809
|
|
0.97107438 0.97478992 0.97540984 0.97510373]
|
|
|
|
mean value: 0.9746629234899955
|
|
|
|
key: test_recall
|
|
value: [0.75 0.64285714 0.78571429 0.57142857 0.75862069 0.65517241
|
|
0.78571429 0.67857143 0.75 0.71428571]
|
|
|
|
mean value: 0.7092364532019705
|
|
|
|
key: train_recall
|
|
value: [0.92125984 0.93307087 0.93307087 0.92125984 0.92490119 0.90513834
|
|
0.92519685 0.91338583 0.93700787 0.92519685]
|
|
|
|
mean value: 0.9239488344589337
|
|
|
|
key: test_roc_auc
|
|
value: [0.82954545 0.80844156 0.8538961 0.77922078 0.86615245 0.78153358
|
|
0.8731203 0.80639098 0.84210526 0.83740602]
|
|
|
|
mean value: 0.8277812477903221
|
|
|
|
key: train_roc_auc
|
|
value: [0.95553531 0.96071302 0.96362422 0.95699091 0.95809013 0.9482087
|
|
0.95751122 0.95233245 0.96414347 0.95823796]
|
|
|
|
mean value: 0.9575387388502652
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.6 0.64705882 0.55172414 0.70967742 0.52777778
|
|
0.70967742 0.57575758 0.63636364 0.64516129]
|
|
|
|
mean value: 0.6203198080391694
|
|
|
|
key: train_jcc
|
|
value: [0.89655172 0.90458015 0.91860465 0.9034749 0.9034749 0.88416988
|
|
0.90038314 0.89230769 0.91538462 0.90384615]
|
|
|
|
mean value: 0.9022777822393082
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0289228 0.01629043 0.01681805 0.0168488 0.01682401 0.01687074
|
|
0.01683521 0.01689267 0.0168004 0.01689029]
|
|
|
|
mean value: 0.01799933910369873
|
|
|
|
key: score_time
|
|
value: [0.01274776 0.01229644 0.01261449 0.01246381 0.0125525 0.01247954
|
|
0.01250553 0.01267982 0.01251364 0.01251364]
|
|
|
|
mean value: 0.012536716461181641
|
|
|
|
key: test_mcc
|
|
value: [0.46169424 0.54365409 0.48290512 0.4833301 0.25771183 0.52610685
|
|
0.50062617 0.37653605 0.48774936 0.6167457 ]
|
|
|
|
mean value: 0.4737059511754335
|
|
|
|
key: train_mcc
|
|
value: [0.51524538 0.49980011 0.5109142 0.54739157 0.52445334 0.50954181
|
|
0.5231752 0.52922993 0.55907902 0.49883421]
|
|
|
|
mean value: 0.5217664774967424
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.82857143 0.8 0.80952381 0.72380952 0.81904762
|
|
0.80769231 0.77884615 0.78846154 0.85576923]
|
|
|
|
mean value: 0.8011721611721612
|
|
|
|
key: train_accuracy
|
|
value: [0.81296493 0.80871413 0.80977683 0.82571732 0.81934113 0.81083953
|
|
0.8163482 0.82059448 0.83121019 0.80785563]
|
|
|
|
mean value: 0.8163362371421287
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.65384615 0.61818182 0.6 0.43137255 0.64150943
|
|
0.62962963 0.48888889 0.63333333 0.70588235]
|
|
|
|
mean value: 0.5990879453920519
|
|
|
|
key: train_fscore
|
|
value: [0.64081633 0.62655602 0.63983903 0.66393443 0.64285714 0.63673469
|
|
0.64621677 0.64718163 0.67080745 0.62680412]
|
|
|
|
mean value: 0.6441747614733685
|
|
|
|
key: test_precision
|
|
value: [0.65217391 0.70833333 0.62962963 0.68181818 0.5 0.70833333
|
|
0.65384615 0.64705882 0.59375 0.7826087 ]
|
|
|
|
mean value: 0.6557552064185697
|
|
|
|
key: train_precision
|
|
value: [0.66525424 0.6622807 0.65432099 0.69230769 0.68609865 0.65822785
|
|
0.67234043 0.68888889 0.70742358 0.65800866]
|
|
|
|
mean value: 0.6745151675029809
|
|
|
|
key: test_recall
|
|
value: [0.53571429 0.60714286 0.60714286 0.53571429 0.37931034 0.5862069
|
|
0.60714286 0.39285714 0.67857143 0.64285714]
|
|
|
|
mean value: 0.5572660098522167
|
|
|
|
key: train_recall
|
|
value: [0.61811024 0.59448819 0.62598425 0.63779528 0.60474308 0.61660079
|
|
0.62204724 0.61023622 0.63779528 0.5984252 ]
|
|
|
|
mean value: 0.6166225763281566
|
|
|
|
key: test_roc_auc
|
|
value: [0.71590909 0.75811688 0.73863636 0.7224026 0.61728675 0.74705082
|
|
0.7443609 0.65695489 0.7537594 0.78853383]
|
|
|
|
mean value: 0.7243011525679403
|
|
|
|
key: train_roc_auc
|
|
value: [0.75155876 0.74120334 0.75185675 0.76649589 0.75149945 0.74943412
|
|
0.75506432 0.75424602 0.77020578 0.74179981]
|
|
|
|
mean value: 0.7533364227228272
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.48571429 0.44736842 0.42857143 0.275 0.47222222
|
|
0.45945946 0.32352941 0.46341463 0.54545455]
|
|
|
|
mean value: 0.43174010750522873
|
|
|
|
key: train_jcc
|
|
value: [0.47147147 0.45619335 0.4704142 0.49693252 0.47368421 0.46706587
|
|
0.47734139 0.47839506 0.5046729 0.45645646]
|
|
|
|
mean value: 0.4752627425365646
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.17531443 0.13732934 0.15173054 0.30316639 0.1397016 0.13541865
|
|
0.13615513 0.1488471 0.13771081 0.13897204]
|
|
|
|
mean value: 0.16043460369110107
|
|
|
|
key: score_time
|
|
value: [0.0116384 0.01145339 0.01149249 0.01155233 0.01201534 0.01170301
|
|
0.01146698 0.01148391 0.01156187 0.01159525]
|
|
|
|
mean value: 0.011596298217773438
|
|
|
|
key: test_mcc
|
|
value: [0.7900429 0.82783735 0.76277007 0.8016605 0.8824 0.72090911
|
|
0.82724889 0.8575977 0.78275643 0.70676692]
|
|
|
|
mean value: 0.795998987860649
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91428571 0.93333333 0.9047619 0.92380952 0.95238095 0.88571429
|
|
0.93269231 0.94230769 0.91346154 0.88461538]
|
|
|
|
mean value: 0.9187362637362637
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84745763 0.87272727 0.82758621 0.85185185 0.91525424 0.8
|
|
0.87272727 0.89655172 0.84210526 0.78571429]
|
|
|
|
mean value: 0.8511975741619839
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.80645161 0.88888889 0.8 0.88461538 0.9 0.77419355
|
|
0.88888889 0.86666667 0.82758621 0.78571429]
|
|
|
|
mean value: 0.8423005482960989
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.89285714 0.85714286 0.85714286 0.82142857 0.93103448 0.82758621
|
|
0.85714286 0.92857143 0.85714286 0.78571429]
|
|
|
|
mean value: 0.861576354679803
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90746753 0.90909091 0.88961039 0.89123377 0.9457804 0.86774047
|
|
0.90883459 0.93796992 0.89567669 0.85338346]
|
|
|
|
mean value: 0.9006788130200108
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.73529412 0.77419355 0.70588235 0.74193548 0.84375 0.66666667
|
|
0.77419355 0.8125 0.72727273 0.64705882]
|
|
|
|
mean value: 0.7428747268702203
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06972575 0.0885663 0.06222439 0.10551476 0.08810663 0.08039427
|
|
0.0723412 0.08061385 0.08970308 0.06838965]
|
|
|
|
mean value: 0.08055799007415772
|
|
|
|
key: score_time
|
|
value: [0.02522445 0.01254392 0.01250386 0.01611376 0.02047777 0.01259971
|
|
0.02132535 0.01723981 0.01259971 0.01989269]
|
|
|
|
mean value: 0.017052102088928222
|
|
|
|
key: test_mcc
|
|
value: [0.59526008 0.66997736 0.52250489 0.51376502 0.5908615 0.56279299
|
|
0.71405131 0.65789474 0.50122741 0.56451316]
|
|
|
|
mean value: 0.5892848455773888
|
|
|
|
key: train_mcc
|
|
value: [0.74418298 0.74592766 0.73990708 0.72430323 0.73986999 0.76641706
|
|
0.72812019 0.72980501 0.74784468 0.74794191]
|
|
|
|
mean value: 0.7414319796418983
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.87619048 0.81904762 0.81904762 0.83809524 0.81904762
|
|
0.88461538 0.86538462 0.78846154 0.83653846]
|
|
|
|
mean value: 0.8375
|
|
|
|
key: train_accuracy
|
|
value: [0.90010627 0.90010627 0.89798087 0.89160468 0.89798087 0.90860786
|
|
0.89278132 0.89490446 0.90127389 0.90021231]
|
|
|
|
mean value: 0.8985558797051517
|
|
|
|
key: test_fscore
|
|
value: [0.70967742 0.74509804 0.64150943 0.62745098 0.70175439 0.68852459
|
|
0.79310345 0.75 0.64516129 0.66666667]
|
|
|
|
mean value: 0.6968946254318902
|
|
|
|
key: train_fscore
|
|
value: [0.812 0.81422925 0.80952381 0.79841897 0.80952381 0.82868526
|
|
0.80157171 0.80080483 0.81510934 0.81640625]
|
|
|
|
mean value: 0.8106273231499652
|
|
|
|
key: test_precision
|
|
value: [0.64705882 0.82608696 0.68 0.69565217 0.71428571 0.65625
|
|
0.76666667 0.75 0.58823529 0.73913043]
|
|
|
|
mean value: 0.7063366063816832
|
|
|
|
key: train_precision
|
|
value: [0.82520325 0.81746032 0.816 0.8015873 0.812749 0.83534137
|
|
0.8 0.81893004 0.82329317 0.81007752]
|
|
|
|
mean value: 0.8160641973748922
|
|
|
|
key: test_recall
|
|
value: [0.78571429 0.67857143 0.60714286 0.57142857 0.68965517 0.72413793
|
|
0.82142857 0.75 0.71428571 0.60714286]
|
|
|
|
mean value: 0.6949507389162561
|
|
|
|
key: train_recall
|
|
value: [0.7992126 0.81102362 0.80314961 0.79527559 0.80632411 0.82213439
|
|
0.80314961 0.78346457 0.80708661 0.82283465]
|
|
|
|
mean value: 0.8053655348417417
|
|
|
|
key: test_roc_auc
|
|
value: [0.81493506 0.81331169 0.75162338 0.74025974 0.79219601 0.78970054
|
|
0.86466165 0.82894737 0.76503759 0.76409774]
|
|
|
|
mean value: 0.7924770782756263
|
|
|
|
key: train_roc_auc
|
|
value: [0.86831081 0.87203292 0.86809591 0.86124769 0.86900508 0.88127068
|
|
0.86451085 0.85975554 0.87156656 0.87580686]
|
|
|
|
mean value: 0.8691602901858415
|
|
|
|
key: test_jcc
|
|
value: [0.55 0.59375 0.47222222 0.45714286 0.54054054 0.525
|
|
0.65714286 0.6 0.47619048 0.5 ]
|
|
|
|
mean value: 0.5371988953238953
|
|
|
|
key: train_jcc
|
|
value: [0.68350168 0.68666667 0.68 0.66447368 0.68 0.70748299
|
|
0.66885246 0.66778523 0.68791946 0.68976898]
|
|
|
|
mean value: 0.6816451161476815
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01353955 0.01583195 0.0162034 0.01615739 0.01609588 0.01603723
|
|
0.01614356 0.01637602 0.01617908 0.01602554]
|
|
|
|
mean value: 0.015858960151672364
|
|
|
|
key: score_time
|
|
value: [0.01226473 0.0206635 0.01224327 0.01228356 0.01226664 0.01225877
|
|
0.0122447 0.0122242 0.01221085 0.01222301]
|
|
|
|
mean value: 0.013088321685791016
|
|
|
|
key: test_mcc
|
|
value: [0.58328237 0.64623033 0.51298701 0.53218116 0.55234199 0.47595281
|
|
0.53058032 0.57168691 0.50490733 0.45069757]
|
|
|
|
mean value: 0.5360847797119024
|
|
|
|
key: train_mcc
|
|
value: [0.53279301 0.53498923 0.53826168 0.54587092 0.54476219 0.54148722
|
|
0.53628245 0.54060694 0.53624637 0.54393873]
|
|
|
|
mean value: 0.5395238731507676
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.86666667 0.80952381 0.81904762 0.81904762 0.79047619
|
|
0.81730769 0.83653846 0.79807692 0.79807692]
|
|
|
|
mean value: 0.8183333333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.81827843 0.81827843 0.81934113 0.82252922 0.82359192 0.82040383
|
|
0.81847134 0.82059448 0.81953291 0.82059448]
|
|
|
|
mean value: 0.820161616024873
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.73076923 0.64285714 0.65454545 0.6779661 0.62068966
|
|
0.65454545 0.67924528 0.6440678 0.57142857]
|
|
|
|
mean value: 0.6576114690642221
|
|
|
|
key: train_fscore
|
|
value: [0.65593561 0.65868263 0.66135458 0.66666667 0.66396761 0.6640159
|
|
0.66003976 0.66267465 0.65863454 0.66666667]
|
|
|
|
mean value: 0.6618638629610494
|
|
|
|
key: test_precision
|
|
value: [0.65625 0.79166667 0.64285714 0.66666667 0.66666667 0.62068966
|
|
0.66666667 0.72 0.61290323 0.66666667]
|
|
|
|
mean value: 0.6711033357169341
|
|
|
|
key: train_precision
|
|
value: [0.67078189 0.66801619 0.66935484 0.67611336 0.68049793 0.668
|
|
0.66666667 0.67206478 0.67213115 0.66798419]
|
|
|
|
mean value: 0.6711610992939772
|
|
|
|
key: test_recall
|
|
value: [0.75 0.67857143 0.64285714 0.64285714 0.68965517 0.62068966
|
|
0.64285714 0.64285714 0.67857143 0.5 ]
|
|
|
|
mean value: 0.6488916256157635
|
|
|
|
key: train_recall
|
|
value: [0.64173228 0.6496063 0.65354331 0.65748031 0.64822134 0.66007905
|
|
0.65354331 0.65354331 0.64566929 0.66535433]
|
|
|
|
mean value: 0.6528772836201798
|
|
|
|
key: test_roc_auc
|
|
value: [0.80357143 0.80681818 0.75649351 0.76298701 0.77903811 0.73797641
|
|
0.76221805 0.77537594 0.76033835 0.70394737]
|
|
|
|
mean value: 0.7648764348174512
|
|
|
|
key: train_roc_auc
|
|
value: [0.76264198 0.76512338 0.76709189 0.77051599 0.76815137 0.76971976
|
|
0.76645189 0.76790537 0.76469511 0.77163065]
|
|
|
|
mean value: 0.7673927395048193
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.57575758 0.47368421 0.48648649 0.51282051 0.45
|
|
0.48648649 0.51428571 0.475 0.4 ]
|
|
|
|
mean value: 0.491298252482463
|
|
|
|
key: train_jcc
|
|
value: [0.48802395 0.49107143 0.49404762 0.5 0.4969697 0.49702381
|
|
0.4925816 0.49552239 0.49101796 0.5 ]
|
|
|
|
mean value: 0.49462584607138077
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02959919 0.02257371 0.02463174 0.02462006 0.02352881 0.02986574
|
|
0.02051759 0.02791381 0.02691722 0.03262067]
|
|
|
|
mean value: 0.02627885341644287
|
|
|
|
key: score_time
|
|
value: [0.01230764 0.01232553 0.0123086 0.01223731 0.01227355 0.01228428
|
|
0.01223445 0.01227784 0.01229286 0.01232219]
|
|
|
|
mean value: 0.01228642463684082
|
|
|
|
key: test_mcc
|
|
value: [0.42663333 0.77043179 0.53542695 0.39506237 0.47919051 0.44931641
|
|
0.2306974 0.4738791 0.60637959 0.49160514]
|
|
|
|
mean value: 0.4858622593919944
|
|
|
|
key: train_mcc
|
|
value: [0.52392759 0.69247445 0.60980007 0.52384574 0.47063674 0.60859336
|
|
0.24486604 0.67256322 0.71169252 0.47563878]
|
|
|
|
mean value: 0.5534038507590235
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.9047619 0.77142857 0.79047619 0.80952381 0.8
|
|
0.75 0.80769231 0.82692308 0.81730769]
|
|
|
|
mean value: 0.8078113553113553
|
|
|
|
key: train_accuracy
|
|
value: [0.82359192 0.87035069 0.78639745 0.82571732 0.80871413 0.8544102
|
|
0.75265393 0.87473461 0.88322718 0.8089172 ]
|
|
|
|
mean value: 0.8288714630277678
|
|
|
|
key: test_fscore
|
|
value: [0.46153846 0.83333333 0.66666667 0.38888889 0.52380952 0.53333333
|
|
0.13333333 0.58333333 0.71875 0.53658537]
|
|
|
|
mean value: 0.5379572240090532
|
|
|
|
key: train_fscore
|
|
value: [0.52298851 0.77898551 0.70911722 0.54945055 0.46428571 0.65139949
|
|
0.15884477 0.75416667 0.79166667 0.46107784]
|
|
|
|
mean value: 0.584198293222982
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.78125 0.54545455 0.875 0.84615385 0.75
|
|
1. 0.7 0.63888889 0.84615385]
|
|
|
|
mean value: 0.7801082944832944
|
|
|
|
key: train_precision
|
|
value: [0.96808511 0.72147651 0.56064073 0.90909091 0.93975904 0.91428571
|
|
0.95652174 0.80088496 0.76277372 0.9625 ]
|
|
|
|
mean value: 0.8496018425747125
|
|
|
|
key: test_recall
|
|
value: [0.32142857 0.89285714 0.85714286 0.25 0.37931034 0.4137931
|
|
0.07142857 0.5 0.82142857 0.39285714]
|
|
|
|
mean value: 0.4900246305418719
|
|
|
|
key: train_recall
|
|
value: [0.35826772 0.84645669 0.96456693 0.39370079 0.3083004 0.50592885
|
|
0.08661417 0.71259843 0.82283465 0.30314961]
|
|
|
|
mean value: 0.530241822538981
|
|
|
|
key: test_roc_auc
|
|
value: [0.64772727 0.90097403 0.7987013 0.61850649 0.67649728 0.68058076
|
|
0.53571429 0.71052632 0.82518797 0.68327068]
|
|
|
|
mean value: 0.7077686378956797
|
|
|
|
key: train_roc_auc
|
|
value: [0.67695045 0.86282078 0.84254547 0.68957237 0.65051648 0.7442435
|
|
0.54258034 0.82359572 0.86417895 0.64939457]
|
|
|
|
mean value: 0.7346398637423248
|
|
|
|
key: test_jcc
|
|
value: [0.3 0.71428571 0.5 0.24137931 0.35483871 0.36363636
|
|
0.07142857 0.41176471 0.56097561 0.36666667]
|
|
|
|
mean value: 0.3884975651678014
|
|
|
|
key: train_jcc
|
|
value: [0.3540856 0.6379822 0.54932735 0.37878788 0.30232558 0.48301887
|
|
0.08627451 0.60535117 0.65517241 0.29961089]
|
|
|
|
mean value: 0.43519364704336044
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03148866 0.04746366 0.03590059 0.03184628 0.03386998 0.03993583
|
|
0.03780341 0.02914476 0.0624752 0.05556989]
|
|
|
|
mean value: 0.040549826622009275
|
|
|
|
key: score_time
|
|
value: [0.01234508 0.01235318 0.01225686 0.01228309 0.01280737 0.01371312
|
|
0.01594853 0.03264642 0.02191806 0.01278949]
|
|
|
|
mean value: 0.015906119346618654
|
|
|
|
key: test_mcc
|
|
value: [0.52223297 0.21751088 0.63716836 0.63566083 0.61129493 0.22558186
|
|
0.31467931 0.49842509 0.42999076 0.61932499]
|
|
|
|
mean value: 0.4711869978433264
|
|
|
|
key: train_mcc
|
|
value: [0.74082294 0.32069422 0.72078765 0.70272385 0.74852979 0.19834202
|
|
0.39962127 0.67219085 0.4310378 0.70555008]
|
|
|
|
mean value: 0.5640300459708724
|
|
|
|
key: test_accuracy
|
|
value: [0.76190476 0.75238095 0.83809524 0.82857143 0.84761905 0.74285714
|
|
0.76923077 0.81730769 0.79807692 0.81730769]
|
|
|
|
mean value: 0.7973351648351649
|
|
|
|
key: train_accuracy
|
|
value: [0.88310308 0.76833156 0.86822529 0.8607864 0.89904357 0.74601488
|
|
0.7866242 0.87685775 0.79723992 0.85987261]
|
|
|
|
mean value: 0.8346099261976802
|
|
|
|
key: test_fscore
|
|
value: [0.65753425 0.1875 0.73846154 0.73529412 0.71428571 0.12903226
|
|
0.29411765 0.59574468 0.43243243 0.72463768]
|
|
|
|
mean value: 0.520904031653591
|
|
|
|
key: train_fscore
|
|
value: [0.81099656 0.26351351 0.79470199 0.7827529 0.81765835 0.11152416
|
|
0.34951456 0.73148148 0.41945289 0.78360656]
|
|
|
|
mean value: 0.586520296839856
|
|
|
|
key: test_precision
|
|
value: [0.53333333 0.75 0.64864865 0.625 0.74074074 1.
|
|
0.83333333 0.73684211 0.88888889 0.6097561 ]
|
|
|
|
mean value: 0.7366543147769078
|
|
|
|
key: train_precision
|
|
value: [0.7195122 0.92857143 0.68571429 0.67621777 0.79477612 0.9375
|
|
0.98181818 0.88764045 0.92 0.67134831]
|
|
|
|
mean value: 0.8203098739716757
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.10714286 0.85714286 0.89285714 0.68965517 0.06896552
|
|
0.17857143 0.5 0.28571429 0.89285714]
|
|
|
|
mean value: 0.5330049261083744
|
|
|
|
key: train_recall
|
|
value: [0.92913386 0.15354331 0.94488189 0.92913386 0.84189723 0.05928854
|
|
0.21259843 0.62204724 0.27165354 0.94094488]
|
|
|
|
mean value: 0.5905122778625004
|
|
|
|
key: test_roc_auc
|
|
value: [0.79220779 0.54707792 0.84415584 0.84902597 0.79877495 0.53448276
|
|
0.58270677 0.71710526 0.6362782 0.84116541]
|
|
|
|
mean value: 0.7142980884813915
|
|
|
|
key: train_roc_auc
|
|
value: [0.89760914 0.57458825 0.89238272 0.8823253 0.88097769 0.52891752
|
|
0.60557247 0.79648874 0.63146631 0.88544337]
|
|
|
|
mean value: 0.7575771504068265
|
|
|
|
key: test_jcc
|
|
value: [0.48979592 0.10344828 0.58536585 0.58139535 0.55555556 0.06896552
|
|
0.17241379 0.42424242 0.27586207 0.56818182]
|
|
|
|
mean value: 0.38252265740153046
|
|
|
|
key: train_jcc
|
|
value: [0.68208092 0.15175097 0.65934066 0.64305177 0.69155844 0.05905512
|
|
0.21176471 0.57664234 0.26538462 0.64420485]
|
|
|
|
mean value: 0.45848343965300536
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.30645943 0.29946876 0.29675651 0.29512572 0.29090285 0.29409957
|
|
0.30035543 0.30036378 0.29486942 0.29444981]
|
|
|
|
mean value: 0.2972851276397705
|
|
|
|
key: score_time
|
|
value: [0.01755786 0.01698351 0.01792598 0.0173254 0.01617551 0.01720262
|
|
0.01784039 0.01638985 0.01726007 0.01617026]
|
|
|
|
mean value: 0.017083144187927245
|
|
|
|
key: test_mcc
|
|
value: [0.69873073 0.72476485 0.72364444 0.70186874 0.81430264 0.77478236
|
|
0.7556391 0.73441518 0.83665149 0.70087664]
|
|
|
|
mean value: 0.7465676162904644
|
|
|
|
key: train_mcc
|
|
value: [0.87026808 0.90093262 0.90232484 0.90912022 0.87421303 0.9048829
|
|
0.87316902 0.90663783 0.85811026 0.87668181]
|
|
|
|
mean value: 0.8876340605111953
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.8952381 0.88571429 0.88571429 0.92380952 0.9047619
|
|
0.90384615 0.89423077 0.93269231 0.88461538]
|
|
|
|
mean value: 0.8977289377289377
|
|
|
|
key: train_accuracy
|
|
value: [0.94899044 0.96068013 0.96174283 0.96386823 0.95005313 0.96280553
|
|
0.95010616 0.96284501 0.94373673 0.95116773]
|
|
|
|
mean value: 0.9555995902628771
|
|
|
|
key: test_fscore
|
|
value: [0.78125 0.79245283 0.8 0.77777778 0.86666667 0.83870968
|
|
0.82142857 0.80701754 0.88135593 0.77777778]
|
|
|
|
mean value: 0.8144436777321867
|
|
|
|
key: train_fscore
|
|
value: [0.90513834 0.92787524 0.92828685 0.93385214 0.90838207 0.93013972
|
|
0.90729783 0.93203883 0.89668616 0.91015625]
|
|
|
|
mean value: 0.9179853438259079
|
|
|
|
key: test_precision
|
|
value: [0.69444444 0.84 0.75 0.80769231 0.83870968 0.78787879
|
|
0.82142857 0.79310345 0.83870968 0.80769231]
|
|
|
|
mean value: 0.7979659222250991
|
|
|
|
key: train_precision
|
|
value: [0.90873016 0.91891892 0.93951613 0.92307692 0.89615385 0.93951613
|
|
0.90909091 0.91954023 0.88803089 0.90310078]
|
|
|
|
mean value: 0.9145674907145016
|
|
|
|
key: test_recall
|
|
value: [0.89285714 0.75 0.85714286 0.75 0.89655172 0.89655172
|
|
0.82142857 0.82142857 0.92857143 0.75 ]
|
|
|
|
mean value: 0.8364532019704434
|
|
|
|
key: train_recall
|
|
value: [0.9015748 0.93700787 0.91732283 0.94488189 0.92094862 0.92094862
|
|
0.90551181 0.94488189 0.90551181 0.91732283]
|
|
|
|
mean value: 0.9215912981233076
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.84902597 0.87662338 0.84253247 0.91538113 0.90222323
|
|
0.87781955 0.8712406 0.93139098 0.84210526]
|
|
|
|
mean value: 0.8783342564876141
|
|
|
|
key: train_roc_auc
|
|
value: [0.93404795 0.95322009 0.94774439 0.9578849 0.94085222 0.94957315
|
|
0.93604079 0.95717932 0.93168032 0.94049281]
|
|
|
|
mean value: 0.9448715943580216
|
|
|
|
key: test_jcc
|
|
value: [0.64102564 0.65625 0.66666667 0.63636364 0.76470588 0.72222222
|
|
0.6969697 0.67647059 0.78787879 0.63636364]
|
|
|
|
mean value: 0.6884916758078523
|
|
|
|
key: train_jcc
|
|
value: [0.8267148 0.86545455 0.866171 0.87591241 0.83214286 0.86940299
|
|
0.83032491 0.87272727 0.81272085 0.83512545]
|
|
|
|
mean value: 0.8486697080152446
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21935773 0.24041748 0.23670745 0.2309885 0.23893952 0.23633647
|
|
0.23115873 0.23125362 0.22829819 0.13062811]
|
|
|
|
mean value: 0.22240858078002929
|
|
|
|
key: score_time
|
|
value: [0.04609227 0.04208827 0.04549074 0.04223323 0.03452539 0.04498839
|
|
0.03371668 0.04103255 0.03953862 0.03698277]
|
|
|
|
mean value: 0.0406688928604126
|
|
|
|
key: test_mcc
|
|
value: [0.73333587 0.69751845 0.66742381 0.77532984 0.85497742 0.65534313
|
|
0.83109768 0.78275643 0.8510645 0.67314671]
|
|
|
|
mean value: 0.7521993852016897
|
|
|
|
key: train_mcc
|
|
value: [0.97839019 0.97839774 0.98650532 0.99460851 0.98647156 0.98918788
|
|
0.98123513 0.97840603 0.97576225 0.99191669]
|
|
|
|
mean value: 0.9840881305567699
|
|
|
|
key: test_accuracy
|
|
value: [0.88571429 0.88571429 0.86666667 0.91428571 0.94285714 0.85714286
|
|
0.93269231 0.91346154 0.94230769 0.875 ]
|
|
|
|
mean value: 0.9015842490842491
|
|
|
|
key: train_accuracy
|
|
value: [0.99149841 0.99149841 0.9946865 0.9978746 0.9946865 0.9957492
|
|
0.992569 0.99150743 0.99044586 0.99681529]
|
|
|
|
mean value: 0.9937331203422298
|
|
|
|
key: test_fscore
|
|
value: [0.80645161 0.76923077 0.75862069 0.83018868 0.89285714 0.75409836
|
|
0.87719298 0.84210526 0.88888889 0.75471698]
|
|
|
|
mean value: 0.8174351370182331
|
|
|
|
key: train_fscore
|
|
value: [0.98418972 0.98406375 0.99013807 0.99604743 0.99009901 0.99209486
|
|
0.98630137 0.98406375 0.98196393 0.99405941]
|
|
|
|
mean value: 0.988302128647157
|
|
|
|
key: test_precision
|
|
value: [0.73529412 0.83333333 0.73333333 0.88 0.92592593 0.71875
|
|
0.86206897 0.82758621 0.92307692 0.8 ]
|
|
|
|
mean value: 0.8239368805730367
|
|
|
|
key: train_precision
|
|
value: [0.98809524 0.99596774 0.99209486 1. 0.99206349 0.99209486
|
|
0.98054475 0.99596774 1. 1. ]
|
|
|
|
mean value: 0.9936828684431568
|
|
|
|
key: test_recall
|
|
value: [0.89285714 0.71428571 0.78571429 0.78571429 0.86206897 0.79310345
|
|
0.89285714 0.85714286 0.85714286 0.71428571]
|
|
|
|
mean value: 0.8155172413793104
|
|
|
|
key: train_recall
|
|
value: [0.98031496 0.97244094 0.98818898 0.99212598 0.98814229 0.99209486
|
|
0.99212598 0.97244094 0.96456693 0.98818898]
|
|
|
|
mean value: 0.98306308549376
|
|
|
|
key: test_roc_auc
|
|
value: [0.88798701 0.83116883 0.84090909 0.87337662 0.91787659 0.8373412
|
|
0.92011278 0.89567669 0.91541353 0.82424812]
|
|
|
|
mean value: 0.8744110472105028
|
|
|
|
key: train_roc_auc
|
|
value: [0.98797407 0.98549267 0.99263888 0.99606299 0.99261766 0.99459394
|
|
0.99242927 0.98549373 0.98228346 0.99409449]
|
|
|
|
mean value: 0.990368117338126
|
|
|
|
key: test_jcc
|
|
value: [0.67567568 0.625 0.61111111 0.70967742 0.80645161 0.60526316
|
|
0.78125 0.72727273 0.8 0.60606061]
|
|
|
|
mean value: 0.6947762310272921
|
|
|
|
key: train_jcc
|
|
value: [0.9688716 0.96862745 0.98046875 0.99212598 0.98039216 0.98431373
|
|
0.97297297 0.96862745 0.96456693 0.98818898]
|
|
|
|
mean value: 0.9769155992381218
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.58218765 0.37049866 0.41866732 0.38032222 0.44648361 0.35902166
|
|
0.36449242 0.36559463 0.36136842 0.56200552]
|
|
|
|
mean value: 0.4210642099380493
|
|
|
|
key: score_time
|
|
value: [0.04954362 0.02290463 0.02298665 0.03592324 0.03927064 0.02267504
|
|
0.02226734 0.02231359 0.02263856 0.04020643]
|
|
|
|
mean value: 0.030072975158691406
|
|
|
|
key: test_mcc
|
|
value: [0.46030081 0.39129279 0.39129279 0.43082022 0.41740709 0.40554533
|
|
0.49160514 0.39145415 0.4423253 0.46382494]
|
|
|
|
mean value: 0.42858685529941937
|
|
|
|
key: train_mcc
|
|
value: [0.87301863 0.87241956 0.86423167 0.87490576 0.86449944 0.88897395
|
|
0.8615454 0.85578816 0.87818056 0.86458049]
|
|
|
|
mean value: 0.8698143619421551
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.79047619 0.79047619 0.8 0.79047619 0.78095238
|
|
0.81730769 0.78846154 0.79807692 0.80769231]
|
|
|
|
mean value: 0.7973443223443224
|
|
|
|
key: train_accuracy
|
|
value: [0.95005313 0.95005313 0.94686504 0.95111583 0.94686504 0.95642933
|
|
0.94585987 0.94373673 0.9522293 0.94692144]
|
|
|
|
mean value: 0.9490128855105131
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.42105263 0.42105263 0.43243243 0.5 0.53061224
|
|
0.53658537 0.45 0.55319149 0.47368421]
|
|
|
|
mean value: 0.48186110062299625
|
|
|
|
key: train_fscore
|
|
value: [0.89848812 0.8993576 0.89224138 0.90212766 0.89082969 0.91220557
|
|
0.88984881 0.88552916 0.90364026 0.89177489]
|
|
|
|
mean value: 0.8966043141819788
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.8 0.8 0.88888889 0.73333333 0.65
|
|
0.84615385 0.75 0.68421053 0.9 ]
|
|
|
|
mean value: 0.7885919928025191
|
|
|
|
key: train_precision
|
|
value: [0.99521531 0.98591549 0.98571429 0.98148148 0.99512195 0.9953271
|
|
0.98564593 0.98086124 0.99061033 0.99038462]
|
|
|
|
mean value: 0.9886277746238155
|
|
|
|
key: test_recall
|
|
value: [0.35714286 0.28571429 0.28571429 0.28571429 0.37931034 0.44827586
|
|
0.39285714 0.32142857 0.46428571 0.32142857]
|
|
|
|
mean value: 0.3541871921182266
|
|
|
|
key: train_recall
|
|
value: [0.81889764 0.82677165 0.81496063 0.83464567 0.80632411 0.84189723
|
|
0.81102362 0.80708661 0.83070866 0.81102362]
|
|
|
|
mean value: 0.8203339454109738
|
|
|
|
key: test_roc_auc
|
|
value: [0.66558442 0.62987013 0.62987013 0.63636364 0.66333938 0.6780853
|
|
0.68327068 0.64097744 0.69266917 0.65413534]
|
|
|
|
mean value: 0.6574165625662903
|
|
|
|
key: train_roc_auc
|
|
value: [0.90872102 0.91120242 0.90529691 0.91441163 0.90243531 0.92022187
|
|
0.90333158 0.90063633 0.91390084 0.90405832]
|
|
|
|
mean value: 0.9084216230219961
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.26666667 0.26666667 0.27586207 0.33333333 0.36111111
|
|
0.36666667 0.29032258 0.38235294 0.31034483]
|
|
|
|
mean value: 0.31866601961511337
|
|
|
|
key: train_jcc
|
|
value: [0.81568627 0.81712062 0.80544747 0.82170543 0.80314961 0.83858268
|
|
0.80155642 0.79457364 0.82421875 0.8046875 ]
|
|
|
|
mean value: 0.8126728391360489
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.30799556 1.29913497 1.31377244 1.30130267 1.31007338 1.30316687
|
|
1.31127691 1.31758642 1.31151032 1.30925107]
|
|
|
|
mean value: 1.3085070610046388
|
|
|
|
key: score_time
|
|
value: [0.01042414 0.00974035 0.01053309 0.00961423 0.01052761 0.01053381
|
|
0.00991249 0.01054859 0.00976205 0.00986886]
|
|
|
|
mean value: 0.010146522521972656
|
|
|
|
key: test_mcc
|
|
value: [0.75154799 0.8016605 0.74282918 0.77856132 0.92955115 0.68301456
|
|
0.82724889 0.83665149 0.88404351 0.72835937]
|
|
|
|
mean value: 0.7963467959397353
|
|
|
|
key: train_mcc
|
|
value: [0.98650062 0.9892024 0.99460851 0.97570666 0.99189326 0.97833596
|
|
0.9865106 0.99190781 0.98654405 0.98650586]
|
|
|
|
mean value: 0.9867715724004553
|
|
|
|
key: test_accuracy
|
|
value: [0.8952381 0.92380952 0.8952381 0.91428571 0.97142857 0.86666667
|
|
0.93269231 0.93269231 0.95192308 0.89423077]
|
|
|
|
mean value: 0.9178205128205128
|
|
|
|
key: train_accuracy
|
|
value: [0.9946865 0.9957492 0.9978746 0.99043571 0.9968119 0.99149841
|
|
0.99469214 0.99681529 0.99469214 0.99469214]
|
|
|
|
mean value: 0.9947948042805796
|
|
|
|
key: test_fscore
|
|
value: [0.81967213 0.85185185 0.81355932 0.83636364 0.94915254 0.77419355
|
|
0.87272727 0.88135593 0.91525424 0.8 ]
|
|
|
|
mean value: 0.8514130474375704
|
|
|
|
key: train_fscore
|
|
value: [0.99009901 0.99209486 0.99604743 0.98224852 0.99403579 0.98412698
|
|
0.99013807 0.99408284 0.99017682 0.99009901]
|
|
|
|
mean value: 0.9903149327004045
|
|
|
|
key: test_precision
|
|
value: [0.75757576 0.88461538 0.77419355 0.85185185 0.93333333 0.72727273
|
|
0.88888889 0.83870968 0.87096774 0.81481481]
|
|
|
|
mean value: 0.8342223726094694
|
|
|
|
key: train_precision
|
|
value: [0.99601594 0.99603175 1. 0.98418972 1. 0.98804781
|
|
0.99209486 0.99604743 0.98823529 0.99601594]
|
|
|
|
mean value: 0.993667873723457
|
|
|
|
key: test_recall
|
|
value: [0.89285714 0.82142857 0.85714286 0.82142857 0.96551724 0.82758621
|
|
0.85714286 0.92857143 0.96428571 0.78571429]
|
|
|
|
mean value: 0.8721674876847291
|
|
|
|
key: train_recall
|
|
value: [0.98425197 0.98818898 0.99212598 0.98031496 0.98814229 0.98023715
|
|
0.98818898 0.99212598 0.99212598 0.98425197]
|
|
|
|
mean value: 0.9869954249789923
|
|
|
|
key: test_roc_auc
|
|
value: [0.89448052 0.89123377 0.88311688 0.88474026 0.96960073 0.85458258
|
|
0.90883459 0.93139098 0.95582707 0.85996241]
|
|
|
|
mean value: 0.9033769769250714
|
|
|
|
key: train_roc_auc
|
|
value: [0.99139818 0.99336669 0.99606299 0.98724627 0.99407115 0.98793834
|
|
0.992641 0.99533625 0.99388276 0.99139924]
|
|
|
|
mean value: 0.9923342870807421
|
|
|
|
key: test_jcc
|
|
value: [0.69444444 0.74193548 0.68571429 0.71875 0.90322581 0.63157895
|
|
0.77419355 0.78787879 0.84375 0.66666667]
|
|
|
|
mean value: 0.7448137970782283
|
|
|
|
key: train_jcc
|
|
value: [0.98039216 0.98431373 0.99212598 0.96511628 0.98814229 0.96875
|
|
0.98046875 0.98823529 0.98054475 0.98039216]
|
|
|
|
mean value: 0.9808481386226899
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04931426 0.03909922 0.04333639 0.03963757 0.03972554 0.04711246
|
|
0.03983569 0.04689312 0.04018211 0.03991532]
|
|
|
|
mean value: 0.04250516891479492
|
|
|
|
key: score_time
|
|
value: [0.01280475 0.01316428 0.01401424 0.0142684 0.02189922 0.01407385
|
|
0.01594543 0.01312542 0.01410413 0.01420999]
|
|
|
|
mean value: 0.014760971069335938
|
|
|
|
key: test_mcc
|
|
value: [ 0.14892085 0.0489116 0.05967159 -0.06154575 0.10285966 0.127851
|
|
0.13826318 0.05090762 0.03507633 -0.05932989]
|
|
|
|
mean value: 0.05915861832390828
|
|
|
|
key: train_mcc
|
|
value: [0.19284335 0.18407652 0.25206948 0.18534544 0.18358025 0.18230893
|
|
0.1812842 0.1950451 0.19626512 0.1950451 ]
|
|
|
|
mean value: 0.19478634814967297
|
|
|
|
key: test_accuracy
|
|
value: [0.36190476 0.32380952 0.37142857 0.31428571 0.36190476 0.35238095
|
|
0.35576923 0.32692308 0.29807692 0.31730769]
|
|
|
|
mean value: 0.33837912087912086
|
|
|
|
key: train_accuracy
|
|
value: [0.36131775 0.35387885 0.41657811 0.35494155 0.35281615 0.35175345
|
|
0.35138004 0.36305732 0.3641189 0.36305732]
|
|
|
|
mean value: 0.3632899454210297
|
|
|
|
key: test_fscore
|
|
value: [0.44628099 0.42276423 0.42105263 0.38983051 0.44628099 0.4516129
|
|
0.44628099 0.42622951 0.42519685 0.39316239]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
0.42686919978810334
|
|
|
|
key: train_fscore
|
|
value: [0.45807033 0.45519713 0.48060549 0.45560538 0.45381166 0.45340502
|
|
0.45397676 0.45848375 0.45889792 0.45848375]
|
|
|
|
mean value: 0.45865372080648364
|
|
|
|
key: test_precision
|
|
value: [0.29032258 0.27368421 0.27906977 0.25555556 0.29347826 0.29473684
|
|
0.29032258 0.27659574 0.27272727 0.25842697]
|
|
|
|
mean value: 0.27849197814891413
|
|
|
|
key: train_precision
|
|
value: [0.29707602 0.29466357 0.31631382 0.29500581 0.29350348 0.29316338
|
|
0.29364162 0.29742389 0.29777257 0.29742389]
|
|
|
|
mean value: 0.2975988051747818
|
|
|
|
key: test_recall
|
|
value: [0.96428571 0.92857143 0.85714286 0.82142857 0.93103448 0.96551724
|
|
0.96428571 0.92857143 0.96428571 0.82142857]
|
|
|
|
mean value: 0.9146551724137931
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.55357143 0.51623377 0.52597403 0.47564935 0.53788566 0.54196915
|
|
0.54793233 0.51691729 0.50845865 0.47650376]
|
|
|
|
mean value: 0.5201095410941146
|
|
|
|
key: train_roc_auc
|
|
value: [0.56259098 0.55749636 0.60043668 0.55822416 0.55741279 0.55668605
|
|
0.5559593 0.56395349 0.56468023 0.56395349]
|
|
|
|
mean value: 0.5641393529332115
|
|
|
|
key: test_jcc
|
|
value: [0.28723404 0.26804124 0.26666667 0.24210526 0.28723404 0.29166667
|
|
0.28723404 0.27083333 0.27 0.24468085]
|
|
|
|
mean value: 0.27156961456613676
|
|
|
|
key: train_jcc
|
|
value: [0.29707602 0.29466357 0.31631382 0.29500581 0.29350348 0.29316338
|
|
0.29364162 0.29742389 0.29777257 0.29742389]
|
|
|
|
mean value: 0.2975988051747818
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01950979 0.02391148 0.01934743 0.01937056 0.01945186 0.01872849
|
|
0.04591155 0.03032136 0.02094102 0.05050707]
|
|
|
|
mean value: 0.026800060272216798
|
|
|
|
key: score_time
|
|
value: [0.02959514 0.01259971 0.01250029 0.01246119 0.01270914 0.0124867
|
|
0.0194819 0.02002406 0.01261711 0.01999617]
|
|
|
|
mean value: 0.01644713878631592
|
|
|
|
key: test_mcc
|
|
value: [0.67005939 0.74792687 0.54365409 0.43041075 0.5832648 0.61887477
|
|
0.75093926 0.5046982 0.50490733 0.6167457 ]
|
|
|
|
mean value: 0.5971481155120297
|
|
|
|
key: train_mcc
|
|
value: [0.69387713 0.69502378 0.70853649 0.70417122 0.71255044 0.71883821
|
|
0.67735741 0.70257412 0.72492143 0.70489009]
|
|
|
|
mean value: 0.7042740304422997
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.9047619 0.82857143 0.79047619 0.83809524 0.84761905
|
|
0.90384615 0.81730769 0.79807692 0.85576923]
|
|
|
|
mean value: 0.8441666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.88204038 0.88204038 0.88735388 0.88522848 0.88841658 0.89160468
|
|
0.87579618 0.88535032 0.89278132 0.88535032]
|
|
|
|
mean value: 0.8855962509955755
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.8 0.65384615 0.56 0.69090909 0.72413793
|
|
0.81481481 0.6122449 0.6440678 0.70588235]
|
|
|
|
mean value: 0.6967807800019834
|
|
|
|
key: train_fscore
|
|
value: [0.77207392 0.77393075 0.78367347 0.78137652 0.78787879 0.79098361
|
|
0.75975359 0.77868852 0.79759519 0.78225806]
|
|
|
|
mean value: 0.7808212430494164
|
|
|
|
key: test_precision
|
|
value: [0.68571429 0.90909091 0.70833333 0.63636364 0.73076923 0.72413793
|
|
0.84615385 0.71428571 0.61290323 0.7826087 ]
|
|
|
|
mean value: 0.7350360808204064
|
|
|
|
key: train_precision
|
|
value: [0.80686695 0.80168776 0.81355932 0.80416667 0.80578512 0.8212766
|
|
0.79399142 0.81196581 0.8122449 0.80165289]
|
|
|
|
mean value: 0.807319744371096
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.71428571 0.60714286 0.5 0.65517241 0.72413793
|
|
0.78571429 0.53571429 0.67857143 0.64285714]
|
|
|
|
mean value: 0.6700738916256157
|
|
|
|
key: train_recall
|
|
value: [0.74015748 0.7480315 0.75590551 0.75984252 0.77075099 0.76284585
|
|
0.72834646 0.7480315 0.78346457 0.76377953]
|
|
|
|
mean value: 0.7561155893062774
|
|
|
|
key: test_roc_auc
|
|
value: [0.85714286 0.84415584 0.75811688 0.69805195 0.78153358 0.80943739
|
|
0.86654135 0.72838346 0.76033835 0.78853383]
|
|
|
|
mean value: 0.7892235486836213
|
|
|
|
key: train_roc_auc
|
|
value: [0.83732765 0.83980905 0.84592947 0.84571456 0.85121852 0.85089967
|
|
0.82928951 0.842039 0.85830205 0.84700604]
|
|
|
|
mean value: 0.8447535522442832
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.66666667 0.48571429 0.38888889 0.52777778 0.56756757
|
|
0.6875 0.44117647 0.475 0.54545455]
|
|
|
|
mean value: 0.5401130818042583
|
|
|
|
key: train_jcc
|
|
value: [0.62876254 0.63122924 0.6442953 0.64119601 0.65 0.65423729
|
|
0.61258278 0.63758389 0.66333333 0.64238411]
|
|
|
|
mean value: 0.6405604494492473
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: /home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.35018015 0.39355612 0.24602962 0.39055872 0.31069398 0.47747779
|
|
0.38101101 0.38233376 0.36280727 0.28640795]
|
|
|
|
mean value: 0.35810563564300535
|
|
|
|
key: score_time
|
|
value: [0.01960063 0.0257833 0.01804161 0.0270741 0.02642536 0.02735519
|
|
0.01959705 0.02075911 0.01942539 0.01252222]
|
|
|
|
mean value: 0.02165839672088623
|
|
|
|
key: test_mcc
|
|
value: [0.50676059 0.6947088 0.54365409 0.43041075 0.5832648 0.56255844
|
|
0.75093926 0.5046982 0.50490733 0.6167457 ]
|
|
|
|
mean value: 0.5698647943530205
|
|
|
|
key: train_mcc
|
|
value: [0.66406582 0.71269911 0.70853649 0.70417122 0.71255044 0.73342438
|
|
0.67735741 0.70257412 0.72492143 0.70489009]
|
|
|
|
mean value: 0.7045190496417376
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.88571429 0.82857143 0.79047619 0.83809524 0.82857143
|
|
0.90384615 0.81730769 0.79807692 0.85576923]
|
|
|
|
mean value: 0.8346428571428571
|
|
|
|
key: train_accuracy
|
|
value: [0.86928799 0.88841658 0.88735388 0.88522848 0.88841658 0.89691817
|
|
0.87579618 0.88535032 0.89278132 0.88535032]
|
|
|
|
mean value: 0.8854899810699645
|
|
|
|
key: test_fscore
|
|
value: [0.6440678 0.75 0.65384615 0.56 0.69090909 0.67857143
|
|
0.81481481 0.6122449 0.6440678 0.70588235]
|
|
|
|
mean value: 0.6754404332262187
|
|
|
|
key: train_fscore
|
|
value: [0.75251509 0.78787879 0.78367347 0.78137652 0.78787879 0.80244399
|
|
0.75975359 0.77868852 0.79759519 0.78225806]
|
|
|
|
mean value: 0.7814062018676787
|
|
|
|
key: test_precision
|
|
value: [0.61290323 0.9 0.70833333 0.63636364 0.73076923 0.7037037
|
|
0.84615385 0.71428571 0.61290323 0.7826087 ]
|
|
|
|
mean value: 0.7248024611874542
|
|
|
|
key: train_precision
|
|
value: [0.76954733 0.80912863 0.81355932 0.80416667 0.80578512 0.82773109
|
|
0.79399142 0.81196581 0.8122449 0.80165289]
|
|
|
|
mean value: 0.8049773179708749
|
|
|
|
key: test_recall
|
|
value: [0.67857143 0.64285714 0.60714286 0.5 0.65517241 0.65517241
|
|
0.78571429 0.53571429 0.67857143 0.64285714]
|
|
|
|
mean value: 0.6381773399014778
|
|
|
|
key: train_recall
|
|
value: [0.73622047 0.76771654 0.75590551 0.75984252 0.77075099 0.77865613
|
|
0.72834646 0.7480315 0.78346457 0.76377953]
|
|
|
|
mean value: 0.759271420123868
|
|
|
|
key: test_roc_auc
|
|
value: [0.76136364 0.80844156 0.75811688 0.69805195 0.78153358 0.77495463
|
|
0.86654135 0.72838346 0.76033835 0.78853383]
|
|
|
|
mean value: 0.7726259221722016
|
|
|
|
key: train_roc_auc
|
|
value: [0.82735332 0.85037937 0.84592947 0.84571456 0.85121852 0.85953155
|
|
0.82928951 0.842039 0.85830205 0.84700604]
|
|
|
|
mean value: 0.8456763400338374
|
|
|
|
key: test_jcc
|
|
value: [0.475 0.6 0.48571429 0.38888889 0.52777778 0.51351351
|
|
0.6875 0.44117647 0.475 0.54545455]
|
|
|
|
mean value: 0.5140025481937247
|
|
|
|
key: train_jcc
|
|
value: [0.60322581 0.65 0.6442953 0.64119601 0.65 0.67006803
|
|
0.61258278 0.63758389 0.66333333 0.64238411]
|
|
|
|
mean value: 0.6414669262332958
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06039929 0.04807615 0.04720998 0.04900336 0.05667543 0.05436158
|
|
0.04790497 0.05956435 0.04734159 0.05777669]
|
|
|
|
mean value: 0.05283133983612061
|
|
|
|
key: score_time
|
|
value: [0.01315904 0.01270175 0.01343322 0.01349378 0.01343918 0.01361322
|
|
0.01411629 0.0125556 0.01264167 0.01348758]
|
|
|
|
mean value: 0.013264131546020509
|
|
|
|
key: test_mcc
|
|
value: [0.72887572 0.83014354 0.76717745 0.79199494 0.7283738 0.82137062
|
|
0.83345743 0.83345743 0.7228974 0.80437423]
|
|
|
|
mean value: 0.786212257963307
|
|
|
|
key: train_mcc
|
|
value: [0.82904315 0.80723925 0.82819649 0.81781545 0.81715952 0.81918451
|
|
0.81737959 0.81246329 0.82468571 0.82199066]
|
|
|
|
mean value: 0.8195157634655642
|
|
|
|
key: test_accuracy
|
|
value: [0.85620915 0.91503268 0.88235294 0.89542484 0.8627451 0.90849673
|
|
0.91503268 0.91503268 0.85526316 0.90131579]
|
|
|
|
mean value: 0.8906905744754042
|
|
|
|
key: train_accuracy
|
|
value: [0.91272727 0.90181818 0.91272727 0.90690909 0.90690909 0.90763636
|
|
0.90690909 0.90472727 0.91061047 0.90915698]
|
|
|
|
mean value: 0.9080131078224102
|
|
|
|
key: test_fscore
|
|
value: [0.86904762 0.91503268 0.88607595 0.8974359 0.86956522 0.91358025
|
|
0.91925466 0.91925466 0.86746988 0.9044586 ]
|
|
|
|
mean value: 0.8961175404908425
|
|
|
|
key: train_fscore
|
|
value: [0.91666667 0.90631506 0.91620112 0.91135734 0.91086351 0.91186676
|
|
0.91098748 0.90858339 0.91452397 0.91325468]
|
|
|
|
mean value: 0.9120619985967259
|
|
|
|
key: test_precision
|
|
value: [0.79347826 0.90909091 0.85365854 0.875 0.83333333 0.87058824
|
|
0.88095238 0.88095238 0.8 0.87654321]
|
|
|
|
mean value: 0.8573597246954596
|
|
|
|
key: train_precision
|
|
value: [0.87765957 0.86719788 0.88172043 0.87037037 0.87316422 0.87135279
|
|
0.87217044 0.87265416 0.87616511 0.87383798]
|
|
|
|
mean value: 0.8736292943716704
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.92105263 0.92105263 0.92105263 0.90909091 0.96103896
|
|
0.96103896 0.96103896 0.94736842 0.93421053]
|
|
|
|
mean value: 0.9397470950102529
|
|
|
|
key: train_recall
|
|
value: [0.95930233 0.94912791 0.95348837 0.95639535 0.95196507 0.95633188
|
|
0.95342067 0.94759825 0.95639535 0.95639535]
|
|
|
|
mean value: 0.9540420517247216
|
|
|
|
key: test_roc_auc
|
|
value: [0.85688653 0.91507177 0.88260424 0.89559125 0.86244019 0.90815106
|
|
0.91473001 0.91473001 0.85526316 0.90131579]
|
|
|
|
mean value: 0.8906784005468216
|
|
|
|
key: train_roc_auc
|
|
value: [0.91269338 0.90178375 0.91269761 0.90687307 0.90694184 0.90767175
|
|
0.90694289 0.90475843 0.91061047 0.90915698]
|
|
|
|
mean value: 0.9080130158085373
|
|
|
|
key: test_jcc
|
|
value: [0.76842105 0.84337349 0.79545455 0.81395349 0.76923077 0.84090909
|
|
0.85057471 0.85057471 0.76595745 0.8255814 ]
|
|
|
|
mean value: 0.8124030708018686
|
|
|
|
key: train_jcc
|
|
value: [0.84615385 0.8286802 0.84536082 0.83715013 0.83631714 0.8380102
|
|
0.83652618 0.83248082 0.8425096 0.8403576 ]
|
|
|
|
mean value: 0.8383546542619129
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.13714576 1.22635508 1.3632586 1.09811139 1.30617452 1.14291883
|
|
1.38323951 1.11891127 1.17073083 1.10783911]
|
|
|
|
mean value: 1.205468487739563
|
|
|
|
key: score_time
|
|
value: [0.02156997 0.01548386 0.03107142 0.01506853 0.0154469 0.01549959
|
|
0.0127914 0.0154953 0.01539874 0.01568174]
|
|
|
|
mean value: 0.01735074520111084
|
|
|
|
key: test_mcc
|
|
value: [0.77450597 0.81804296 0.79199494 0.84433845 0.74070108 0.79185327
|
|
0.83176564 0.85797782 0.73820516 0.80437423]
|
|
|
|
mean value: 0.7993759533754411
|
|
|
|
key: train_mcc
|
|
value: [0.84787954 0.87015095 0.85252284 0.85920947 0.83795368 0.87155445
|
|
0.8422385 0.86142257 0.84954108 0.85030697]
|
|
|
|
mean value: 0.8542780035810353
|
|
|
|
key: test_accuracy
|
|
value: [0.88235294 0.90849673 0.89542484 0.92156863 0.86928105 0.89542484
|
|
0.91503268 0.92810458 0.86184211 0.90131579]
|
|
|
|
mean value: 0.8978844169246646
|
|
|
|
key: train_accuracy
|
|
value: [0.92290909 0.93454545 0.92509091 0.92872727 0.91781818 0.93527273
|
|
0.92 0.93018182 0.92369186 0.92369186]
|
|
|
|
mean value: 0.9261929175475687
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.8902439 0.90540541 0.8974359 0.92307692 0.875 0.89873418
|
|
0.91823899 0.93081761 0.8742515 0.9044586 ]
|
|
|
|
mean value: 0.9017663005078128
|
|
|
|
key: train_fscore
|
|
value: [0.9255618 0.93617021 0.9278206 0.93098592 0.92070175 0.93674485
|
|
0.92275281 0.93181818 0.92631579 0.92672715]
|
|
|
|
mean value: 0.9285599056381728
|
|
|
|
key: test_precision
|
|
value: [0.82954545 0.93055556 0.875 0.9 0.84337349 0.87654321
|
|
0.8902439 0.90243902 0.8021978 0.87654321]
|
|
|
|
mean value: 0.8726441652857071
|
|
|
|
key: train_precision
|
|
value: [0.89538043 0.91412742 0.89580514 0.90300546 0.88888889 0.91527778
|
|
0.89145183 0.90984743 0.89552239 0.89127517]
|
|
|
|
mean value: 0.9000581953551315
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.88157895 0.92105263 0.94736842 0.90909091 0.92207792
|
|
0.94805195 0.96103896 0.96052632 0.93421053]
|
|
|
|
mean value: 0.9345522898154477
|
|
|
|
key: train_recall
|
|
value: [0.95784884 0.95930233 0.9622093 0.96075581 0.95487627 0.95924309
|
|
0.95633188 0.95487627 0.95930233 0.96511628]
|
|
|
|
mean value: 0.9589862394637961
|
|
|
|
key: test_roc_auc
|
|
value: [0.88286056 0.90832194 0.89559125 0.92173616 0.86901914 0.89524949
|
|
0.91481545 0.9278879 0.86184211 0.90131579]
|
|
|
|
mean value: 0.897863978127136
|
|
|
|
key: train_roc_auc
|
|
value: [0.92288366 0.93452744 0.92506389 0.92870396 0.91784511 0.93529015
|
|
0.9200264 0.93019976 0.92369186 0.92369186]
|
|
|
|
mean value: 0.9261924105480518
|
|
|
|
key: test_jcc
|
|
value: [0.8021978 0.82716049 0.81395349 0.85714286 0.77777778 0.81609195
|
|
0.84883721 0.87058824 0.77659574 0.8255814 ]
|
|
|
|
mean value: 0.821592695796681
|
|
|
|
key: train_jcc
|
|
value: [0.86143791 0.88 0.86535948 0.87088274 0.85305592 0.88101604
|
|
0.85658409 0.87234043 0.8627451 0.86345904]
|
|
|
|
mean value: 0.8666880740779328
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01997805 0.01437449 0.01397061 0.01365924 0.01377439 0.01373744
|
|
0.0138514 0.01391315 0.01384664 0.0139327 ]
|
|
|
|
mean value: 0.014503812789916993
|
|
|
|
key: score_time
|
|
value: [0.0132854 0.00999689 0.00957799 0.00955367 0.00962329 0.00958538
|
|
0.00959682 0.00963831 0.0097363 0.00977492]
|
|
|
|
mean value: 0.010036897659301759
|
|
|
|
key: test_mcc
|
|
value: [0.36574659 0.57926645 0.57418184 0.59860588 0.56559738 0.5691572
|
|
0.53024108 0.59750002 0.53300179 0.62500221]
|
|
|
|
mean value: 0.5538300443947374
|
|
|
|
key: train_mcc
|
|
value: [0.58629771 0.55569616 0.55095292 0.55553189 0.55942174 0.56264615
|
|
0.55543438 0.55203321 0.56086267 0.56009127]
|
|
|
|
mean value: 0.5598968097783753
|
|
|
|
key: test_accuracy
|
|
value: [0.67973856 0.77777778 0.78431373 0.79738562 0.77777778 0.78431373
|
|
0.75816993 0.79738562 0.76315789 0.80921053]
|
|
|
|
mean value: 0.7729231166150671
|
|
|
|
key: train_accuracy
|
|
value: [0.78981818 0.77527273 0.77163636 0.77454545 0.776 0.77745455
|
|
0.77454545 0.77381818 0.77688953 0.77688953]
|
|
|
|
mean value: 0.7766869978858351
|
|
|
|
key: test_fscore
|
|
value: [0.64233577 0.73846154 0.76595745 0.78321678 0.75714286 0.78145695
|
|
0.72992701 0.78911565 0.74285714 0.79432624]
|
|
|
|
mean value: 0.75247973832451
|
|
|
|
key: train_fscore
|
|
value: [0.77297722 0.75915822 0.75118859 0.75628931 0.75594295 0.75714286
|
|
0.75590551 0.75835276 0.75769534 0.75883739]
|
|
|
|
mean value: 0.758349015012571
|
|
|
|
key: test_precision
|
|
value: [0.72131148 0.88888889 0.83076923 0.8358209 0.84126984 0.7972973
|
|
0.83333333 0.82857143 0.8125 0.86153846]
|
|
|
|
mean value: 0.8251300852600706
|
|
|
|
key: train_precision
|
|
value: [0.84102564 0.81848739 0.82578397 0.82363014 0.82956522 0.83246073
|
|
0.82332762 0.81333333 0.82901554 0.82564103]
|
|
|
|
mean value: 0.8262270614267214
|
|
|
|
key: test_recall
|
|
value: [0.57894737 0.63157895 0.71052632 0.73684211 0.68831169 0.76623377
|
|
0.64935065 0.75324675 0.68421053 0.73684211]
|
|
|
|
mean value: 0.693609022556391
|
|
|
|
key: train_recall
|
|
value: [0.71511628 0.70784884 0.68895349 0.69912791 0.69432314 0.69432314
|
|
0.69868996 0.71033479 0.69767442 0.70203488]
|
|
|
|
mean value: 0.7008426847432382
|
|
|
|
key: test_roc_auc
|
|
value: [0.67908407 0.77682843 0.78383459 0.79699248 0.77836637 0.78443267
|
|
0.75888585 0.79767601 0.76315789 0.80921053]
|
|
|
|
mean value: 0.7728468899521531
|
|
|
|
key: train_roc_auc
|
|
value: [0.78987255 0.7753218 0.77169654 0.77460034 0.77594064 0.77739413
|
|
0.77449033 0.77377205 0.77688953 0.77688953]
|
|
|
|
mean value: 0.7766867446938154
|
|
|
|
key: test_jcc
|
|
value: [0.47311828 0.58536585 0.62068966 0.64367816 0.6091954 0.64130435
|
|
0.57471264 0.65168539 0.59090909 0.65882353]
|
|
|
|
mean value: 0.6049482356702764
|
|
|
|
key: train_jcc
|
|
value: [0.62996159 0.61180905 0.60152284 0.60809102 0.60764331 0.6091954
|
|
0.60759494 0.61076345 0.60991105 0.61139241]
|
|
|
|
mean value: 0.6107885064722695
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01470137 0.01940155 0.01966357 0.01939082 0.01938176 0.0193758
|
|
0.01938295 0.01939774 0.01914716 0.01937604]
|
|
|
|
mean value: 0.018921875953674318
|
|
|
|
key: score_time
|
|
value: [0.01264715 0.0128417 0.01289129 0.01285219 0.01289725 0.0128932
|
|
0.01286697 0.01283526 0.01289392 0.01288033]
|
|
|
|
mean value: 0.012849926948547363
|
|
|
|
key: test_mcc
|
|
value: [0.5962008 0.62329017 0.63496441 0.73075678 0.71260054 0.63425056
|
|
0.6601162 0.67327409 0.60715823 0.69791237]
|
|
|
|
mean value: 0.657052415810285
|
|
|
|
key: train_mcc
|
|
value: [0.68630036 0.66997226 0.68009938 0.66706243 0.671509 0.66729052
|
|
0.68029451 0.68166853 0.69934143 0.67631874]
|
|
|
|
mean value: 0.6779857153012689
|
|
|
|
key: test_accuracy
|
|
value: [0.79738562 0.81045752 0.81699346 0.8627451 0.85620915 0.81699346
|
|
0.83006536 0.83660131 0.80263158 0.84868421]
|
|
|
|
mean value: 0.8278766769865842
|
|
|
|
key: train_accuracy
|
|
value: [0.84290909 0.83490909 0.84 0.83345455 0.83563636 0.83345455
|
|
0.84 0.84072727 0.84956395 0.83793605]
|
|
|
|
mean value: 0.8388590909090909
|
|
|
|
key: test_fscore
|
|
value: [0.80254777 0.8 0.82051282 0.86956522 0.85897436 0.81578947
|
|
0.83116883 0.83660131 0.81012658 0.84563758]
|
|
|
|
mean value: 0.8290923945792803
|
|
|
|
key: train_fscore
|
|
value: [0.84593438 0.83680805 0.84149856 0.83537024 0.83764368 0.83607731
|
|
0.84218077 0.84255931 0.85139986 0.84082798]
|
|
|
|
mean value: 0.841030013525283
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.84057971 0.8 0.82352941 0.84810127 0.82666667
|
|
0.83116883 0.84210526 0.7804878 0.8630137 ]
|
|
|
|
mean value: 0.8233430430011774
|
|
|
|
key: train_precision
|
|
value: [0.83053221 0.82788051 0.83428571 0.82645804 0.82695035 0.82253521
|
|
0.83026874 0.83238636 0.84113475 0.82608696]
|
|
|
|
mean value: 0.8298518855214777
|
|
|
|
key: test_recall
|
|
value: [0.82894737 0.76315789 0.84210526 0.92105263 0.87012987 0.80519481
|
|
0.83116883 0.83116883 0.84210526 0.82894737]
|
|
|
|
mean value: 0.8363978127136023
|
|
|
|
key: train_recall
|
|
value: [0.8619186 0.84593023 0.84883721 0.84447674 0.84861718 0.85007278
|
|
0.85443959 0.85298399 0.8619186 0.85610465]
|
|
|
|
mean value: 0.8525299583629532
|
|
|
|
key: test_roc_auc
|
|
value: [0.79759057 0.81015038 0.81715653 0.86312372 0.85611757 0.81707109
|
|
0.8300581 0.83663705 0.80263158 0.84868421]
|
|
|
|
mean value: 0.827922077922078
|
|
|
|
key: train_roc_auc
|
|
value: [0.84289526 0.83490107 0.83999357 0.83344652 0.8356458 0.83346662
|
|
0.84001049 0.84073618 0.84956395 0.83793605]
|
|
|
|
mean value: 0.8388595511323246
|
|
|
|
key: test_jcc
|
|
value: [0.67021277 0.66666667 0.69565217 0.76923077 0.75280899 0.68888889
|
|
0.71111111 0.71910112 0.68085106 0.73255814]
|
|
|
|
mean value: 0.7087081691492148
|
|
|
|
key: train_jcc
|
|
value: [0.73300371 0.71940667 0.72636816 0.71728395 0.72064277 0.71832718
|
|
0.72738538 0.72795031 0.74125 0.72536946]
|
|
|
|
mean value: 0.7256987591762736
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01907301 0.01258779 0.01267409 0.01302505 0.01255798 0.01431584
|
|
0.01402307 0.01441479 0.01424098 0.01382494]
|
|
|
|
mean value: 0.014073753356933593
|
|
|
|
key: score_time
|
|
value: [0.03976297 0.024369 0.02204871 0.02038503 0.0180347 0.01813459
|
|
0.01796532 0.01817608 0.01799345 0.02331591]
|
|
|
|
mean value: 0.022018575668334962
|
|
|
|
key: test_mcc
|
|
value: [0.70444953 0.75226544 0.76499745 0.6984034 0.6898344 0.65942575
|
|
0.73952007 0.80448212 0.64605828 0.7579814 ]
|
|
|
|
mean value: 0.7217417845136312
|
|
|
|
key: train_mcc
|
|
value: [0.80526487 0.79577359 0.78395341 0.79356143 0.80103248 0.80798284
|
|
0.79018652 0.80164773 0.81435994 0.79571032]
|
|
|
|
mean value: 0.7989473123845814
|
|
|
|
key: test_accuracy
|
|
value: [0.8496732 0.87581699 0.88235294 0.84313725 0.84313725 0.82352941
|
|
0.8627451 0.90196078 0.81578947 0.875 ]
|
|
|
|
mean value: 0.8573142414860682
|
|
|
|
key: train_accuracy
|
|
value: [0.90036364 0.896 0.88945455 0.89454545 0.89818182 0.90254545
|
|
0.89309091 0.89890909 0.90552326 0.89607558]
|
|
|
|
mean value: 0.8974689746300212
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.87741935 0.88311688 0.85542169 0.85185185 0.84023669
|
|
0.87573964 0.9044586 0.83333333 0.88343558]
|
|
|
|
mean value: 0.866215647993977
|
|
|
|
key: train_fscore
|
|
value: [0.90545204 0.9009009 0.8954608 0.89993099 0.90331492 0.90642458
|
|
0.8981289 0.90353921 0.90959666 0.90076336]
|
|
|
|
mean value: 0.9023512347427903
|
|
|
|
key: test_precision
|
|
value: [0.81176471 0.86075949 0.87179487 0.78888889 0.81176471 0.77173913
|
|
0.80434783 0.8875 0.76086957 0.82758621]
|
|
|
|
mean value: 0.8197015394755035
|
|
|
|
key: train_precision
|
|
value: [0.86202365 0.86092715 0.84986945 0.85676741 0.85939553 0.87114094
|
|
0.85714286 0.86339523 0.872 0.86188579]
|
|
|
|
mean value: 0.8614548012975458
|
|
|
|
key: test_recall
|
|
value: [0.90789474 0.89473684 0.89473684 0.93421053 0.8961039 0.92207792
|
|
0.96103896 0.92207792 0.92105263 0.94736842]
|
|
|
|
mean value: 0.9201298701298701
|
|
|
|
key: train_recall
|
|
value: [0.95348837 0.94476744 0.94622093 0.94767442 0.95196507 0.94468705
|
|
0.94323144 0.94759825 0.9505814 0.94331395]
|
|
|
|
mean value: 0.9473528316576961
|
|
|
|
key: test_roc_auc
|
|
value: [0.85005126 0.87593985 0.88243336 0.84372864 0.84278879 0.82288107
|
|
0.86209843 0.90182843 0.81578947 0.875 ]
|
|
|
|
mean value: 0.857253930280246
|
|
|
|
key: train_roc_auc
|
|
value: [0.90032497 0.89596451 0.88941323 0.89450679 0.8982209 0.90257608
|
|
0.89312735 0.89894448 0.90552326 0.89607558]
|
|
|
|
mean value: 0.8974677143630886
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.7816092 0.79069767 0.74736842 0.74193548 0.7244898
|
|
0.77894737 0.8255814 0.71428571 0.79120879]
|
|
|
|
mean value: 0.7646123839927266
|
|
|
|
key: train_jcc
|
|
value: [0.82723834 0.81967213 0.81070984 0.81806775 0.82367758 0.82886335
|
|
0.81509434 0.82405063 0.83418367 0.81944444]
|
|
|
|
mean value: 0.822100207718406
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09896469 0.08270192 0.08094859 0.0975244 0.10141873 0.08790708
|
|
0.10121083 0.10040784 0.09751797 0.09295511]
|
|
|
|
mean value: 0.09415571689605713
|
|
|
|
key: score_time
|
|
value: [0.02552009 0.02494621 0.02546787 0.02582455 0.02723217 0.02690339
|
|
0.02615547 0.0273757 0.02689338 0.02538037]
|
|
|
|
mean value: 0.02616991996765137
|
|
|
|
key: test_mcc
|
|
value: [0.66849513 0.81960182 0.76906795 0.74806111 0.77405747 0.75929586
|
|
0.80055798 0.76875613 0.69989647 0.73914915]
|
|
|
|
mean value: 0.7546939068219758
|
|
|
|
key: train_mcc
|
|
value: [0.79942667 0.79410513 0.79385357 0.79671413 0.7971206 0.79125472
|
|
0.78407182 0.78468743 0.7958287 0.7906873 ]
|
|
|
|
mean value: 0.7927750073287233
|
|
|
|
key: test_accuracy
|
|
value: [0.82352941 0.90849673 0.88235294 0.86928105 0.88235294 0.87581699
|
|
0.89542484 0.88235294 0.84210526 0.86842105]
|
|
|
|
mean value: 0.8730134158926729
|
|
|
|
key: train_accuracy
|
|
value: [0.89672727 0.89309091 0.89381818 0.89454545 0.89527273 0.89236364
|
|
0.88872727 0.88872727 0.89534884 0.89171512]
|
|
|
|
mean value: 0.89303366807611
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.91139241 0.8875 0.87804878 0.89156627 0.88484848
|
|
0.90361446 0.88888889 0.85714286 0.87341772]
|
|
|
|
mean value: 0.8818525123999775
|
|
|
|
key: train_fscore
|
|
value: [0.90273973 0.90020367 0.9001368 0.9014276 0.90150479 0.89876881
|
|
0.89542037 0.89570552 0.90096286 0.89857046]
|
|
|
|
mean value: 0.8995440596746382
|
|
|
|
key: test_precision
|
|
value: [0.75789474 0.87804878 0.8452381 0.81818182 0.83146067 0.82954545
|
|
0.84269663 0.84705882 0.7826087 0.84146341]
|
|
|
|
mean value: 0.8274197122481797
|
|
|
|
key: train_precision
|
|
value: [0.85362694 0.84458599 0.8501292 0.8467433 0.85032258 0.84774194
|
|
0.84407216 0.84230769 0.85509138 0.84507042]
|
|
|
|
mean value: 0.8479691603984293
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.94736842 0.93421053 0.94736842 0.96103896 0.94805195
|
|
0.97402597 0.93506494 0.94736842 0.90789474]
|
|
|
|
mean value: 0.9449760765550239
|
|
|
|
key: train_recall
|
|
value: [0.95784884 0.96366279 0.95639535 0.96366279 0.95924309 0.95633188
|
|
0.95342067 0.95633188 0.95203488 0.95930233]
|
|
|
|
mean value: 0.9578234487661217
|
|
|
|
key: test_roc_auc
|
|
value: [0.82433356 0.90874915 0.88268968 0.86978811 0.88183527 0.87534176
|
|
0.89490772 0.88200615 0.84210526 0.86842105]
|
|
|
|
mean value: 0.8730177717019822
|
|
|
|
key: train_roc_auc
|
|
value: [0.89668279 0.89303955 0.89377264 0.89449515 0.89531922 0.89241012
|
|
0.88877429 0.8887764 0.89534884 0.89171512]
|
|
|
|
mean value: 0.8930334111912258
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.8372093 0.79775281 0.7826087 0.80434783 0.79347826
|
|
0.82417582 0.8 0.75 0.7752809 ]
|
|
|
|
mean value: 0.7892126344247997
|
|
|
|
key: train_jcc
|
|
value: [0.8227216 0.81851852 0.81840796 0.82054455 0.82067248 0.81614907
|
|
0.81064356 0.81111111 0.81977472 0.815822 ]
|
|
|
|
mean value: 0.8174365574042904
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.21908903 4.14850903 2.85513258 4.49352384 4.32316732 3.60413384
|
|
1.70084286 3.15354061 4.29196239 2.52911472]
|
|
|
|
mean value: 3.4319016218185423
|
|
|
|
key: score_time
|
|
value: [0.01373982 0.01354122 0.01364374 0.01311398 0.01310658 0.01321745
|
|
0.01310897 0.01313305 0.01305342 0.0131073 ]
|
|
|
|
mean value: 0.0132765531539917
|
|
|
|
key: test_mcc
|
|
value: [0.76582319 0.84423266 0.80458209 0.8319081 0.83014354 0.84339214
|
|
0.83176564 0.86959495 0.80831934 0.77692131]
|
|
|
|
mean value: 0.8206682970235742
|
|
|
|
key: train_mcc
|
|
value: [0.92730754 0.93215647 0.91868434 0.94065016 0.96072823 0.94492237
|
|
0.88409176 0.93019723 0.94857664 0.90728731]
|
|
|
|
mean value: 0.929460205729579
|
|
|
|
key: test_accuracy
|
|
value: [0.88235294 0.92156863 0.90196078 0.91503268 0.91503268 0.92156863
|
|
0.91503268 0.93464052 0.90131579 0.88815789]
|
|
|
|
mean value: 0.9096663226694186
|
|
|
|
key: train_accuracy
|
|
value: [0.96363636 0.96581818 0.95927273 0.97018182 0.98036364 0.97236364
|
|
0.94181818 0.96509091 0.97383721 0.95348837]
|
|
|
|
mean value: 0.9645871035940803
|
|
|
|
key: test_fscore
|
|
value: [0.88461538 0.91891892 0.90322581 0.91719745 0.91503268 0.92307692
|
|
0.91823899 0.93421053 0.9068323 0.88590604]
|
|
|
|
mean value: 0.9107255023462285
|
|
|
|
key: train_fscore
|
|
value: [0.96382055 0.96640457 0.95965418 0.97056712 0.98033503 0.97205882
|
|
0.94269341 0.9649635 0.97439545 0.95287187]
|
|
|
|
mean value: 0.9647764512792509
|
|
|
|
key: test_precision
|
|
value: [0.8625 0.94444444 0.88607595 0.88888889 0.92105263 0.91139241
|
|
0.8902439 0.94666667 0.85882353 0.90410959]
|
|
|
|
mean value: 0.9014198006901212
|
|
|
|
key: train_precision
|
|
value: [0.95965418 0.95077356 0.95142857 0.95886525 0.98104956 0.98216939
|
|
0.9280677 0.96778917 0.954039 0.96567164]
|
|
|
|
mean value: 0.9599508015607497
|
|
|
|
key: test_recall
|
|
value: [0.90789474 0.89473684 0.92105263 0.94736842 0.90909091 0.93506494
|
|
0.94805195 0.92207792 0.96052632 0.86842105]
|
|
|
|
mean value: 0.9214285714285714
|
|
|
|
key: train_recall
|
|
value: [0.96802326 0.98255814 0.96802326 0.98255814 0.97962154 0.96215429
|
|
0.95778748 0.96215429 0.99563953 0.94040698]
|
|
|
|
mean value: 0.9698926915134898
|
|
|
|
key: test_roc_auc
|
|
value: [0.8825188 0.9213944 0.90208476 0.91524265 0.91507177 0.92147984
|
|
0.91481545 0.93472317 0.90131579 0.88815789]
|
|
|
|
mean value: 0.9096804511278196
|
|
|
|
key: train_roc_auc
|
|
value: [0.96363317 0.965806 0.95926636 0.97017281 0.9803631 0.97235622
|
|
0.94182979 0.96508877 0.97383721 0.95348837]
|
|
|
|
mean value: 0.9645841796147727
|
|
|
|
key: test_jcc
|
|
value: [0.79310345 0.85 0.82352941 0.84705882 0.84337349 0.85714286
|
|
0.84883721 0.87654321 0.82954545 0.79518072]
|
|
|
|
mean value: 0.836431463130463
|
|
|
|
key: train_jcc
|
|
value: [0.9301676 0.93499308 0.92243767 0.94281729 0.96142857 0.94563662
|
|
0.89159892 0.93229901 0.95006935 0.90998594]
|
|
|
|
mean value: 0.9321434056837834
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09929824 0.10888624 0.09415102 0.07507944 0.10783005 0.10993171
|
|
0.08492923 0.09847975 0.10214543 0.09533954]
|
|
|
|
mean value: 0.09760706424713135
|
|
|
|
key: score_time
|
|
value: [0.00966668 0.00943613 0.00986648 0.00968099 0.01005101 0.00963473
|
|
0.01003242 0.00983167 0.00968599 0.0097363 ]
|
|
|
|
mean value: 0.009762239456176759
|
|
|
|
key: test_mcc
|
|
value: [0.76491718 0.83014354 0.699419 0.81698565 0.84313055 0.80555346
|
|
0.8181667 0.88299739 0.8553372 0.80325763]
|
|
|
|
mean value: 0.8119908296854097
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88235294 0.91503268 0.8496732 0.90849673 0.92156863 0.90196078
|
|
0.90849673 0.94117647 0.92763158 0.90131579]
|
|
|
|
mean value: 0.9057705538355693
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.91503268 0.8496732 0.90789474 0.92207792 0.90566038
|
|
0.90666667 0.94267516 0.92715232 0.90322581]
|
|
|
|
mean value: 0.9060058868866202
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89189189 0.90909091 0.84415584 0.90789474 0.92207792 0.87804878
|
|
0.93150685 0.925 0.93333333 0.88607595]
|
|
|
|
mean value: 0.9029076216561968
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.92105263 0.85526316 0.90789474 0.92207792 0.93506494
|
|
0.88311688 0.96103896 0.92105263 0.92105263]
|
|
|
|
mean value: 0.9096035543403964
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88226247 0.91507177 0.8497095 0.90849282 0.92156528 0.90174299
|
|
0.9086637 0.9410458 0.92763158 0.90131579]
|
|
|
|
mean value: 0.9057501708817498
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.84337349 0.73863636 0.8313253 0.85542169 0.82758621
|
|
0.82926829 0.89156627 0.86419753 0.82352941]
|
|
|
|
mean value: 0.8290618838546983
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22867084 0.23072553 0.22455144 0.22490263 0.2397933 0.22511888
|
|
0.22980976 0.22913957 0.22906566 0.22833276]
|
|
|
|
mean value: 0.22901103496551514
|
|
|
|
key: score_time
|
|
value: [0.02199197 0.02186108 0.02042413 0.02026534 0.0211246 0.02018738
|
|
0.02022576 0.02108979 0.02015829 0.020473 ]
|
|
|
|
mean value: 0.020780134201049804
|
|
|
|
key: test_mcc
|
|
value: [0.78117468 0.84313055 0.84344558 0.845814 0.79084074 0.82137062
|
|
0.84423266 0.84767786 0.78181707 0.84327404]
|
|
|
|
mean value: 0.8242777798688283
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.92156863 0.92156863 0.92156863 0.89542484 0.90849673
|
|
0.92156863 0.92156863 0.88815789 0.92105263]
|
|
|
|
mean value: 0.9109864121087031
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.89308176 0.92105263 0.92207792 0.92405063 0.8961039 0.91358025
|
|
0.92405063 0.92592593 0.89440994 0.92307692]
|
|
|
|
mean value: 0.9137410510394468
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85542169 0.92105263 0.91025641 0.8902439 0.8961039 0.87058824
|
|
0.90123457 0.88235294 0.84705882 0.9 ]
|
|
|
|
mean value: 0.88743130950265
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.93421053 0.92105263 0.93421053 0.96052632 0.8961039 0.96103896
|
|
0.94805195 0.97402597 0.94736842 0.94736842]
|
|
|
|
mean value: 0.9423957621326042
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88918319 0.92156528 0.92165072 0.9218216 0.89542037 0.90815106
|
|
0.9213944 0.92122351 0.88815789 0.92105263]
|
|
|
|
mean value: 0.9109620642515379
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.80681818 0.85365854 0.85542169 0.85882353 0.81176471 0.84090909
|
|
0.85882353 0.86206897 0.80898876 0.85714286]
|
|
|
|
mean value: 0.8414419847470551
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01608753 0.01561308 0.01475263 0.01487994 0.01497626 0.01502037
|
|
0.01454926 0.01472878 0.01462555 0.01473379]
|
|
|
|
mean value: 0.014996719360351563
|
|
|
|
key: score_time
|
|
value: [0.01048088 0.00959992 0.00956202 0.00969195 0.0096581 0.00960183
|
|
0.00958061 0.00963783 0.00949526 0.0095489 ]
|
|
|
|
mean value: 0.009685730934143067
|
|
|
|
key: test_mcc
|
|
value: [0.59922056 0.71446184 0.63397129 0.66232872 0.68822261 0.71240602
|
|
0.68654932 0.66090477 0.59339083 0.59215653]
|
|
|
|
mean value: 0.6543612482521513
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79738562 0.85620915 0.81699346 0.83006536 0.84313725 0.85620915
|
|
0.84313725 0.83006536 0.79605263 0.79605263]
|
|
|
|
mean value: 0.8265307877536979
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.80745342 0.84931507 0.81578947 0.83544304 0.85 0.85714286
|
|
0.84210526 0.83544304 0.80254777 0.79470199]
|
|
|
|
mean value: 0.8289941912032153
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76470588 0.88571429 0.81578947 0.80487805 0.81927711 0.85714286
|
|
0.85333333 0.81481481 0.77777778 0.8 ]
|
|
|
|
mean value: 0.8193433582034443
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85526316 0.81578947 0.81578947 0.86842105 0.88311688 0.85714286
|
|
0.83116883 0.85714286 0.82894737 0.78947368]
|
|
|
|
mean value: 0.8402255639097744
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79776145 0.85594668 0.81698565 0.83031442 0.84287423 0.85620301
|
|
0.84321599 0.82988722 0.79605263 0.79605263]
|
|
|
|
mean value: 0.8265293916609706
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.67708333 0.73809524 0.68888889 0.7173913 0.73913043 0.75
|
|
0.72727273 0.7173913 0.67021277 0.65934066]
|
|
|
|
mean value: 0.7084806656366555
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [4.49464321 4.38093638 4.30495667 4.36050797 4.34949279 4.30632448
|
|
4.35694075 4.38662696 4.37167072 4.35131645]
|
|
|
|
mean value: 4.366341638565063
|
|
|
|
key: score_time
|
|
value: [0.11497831 0.10632992 0.10539389 0.10504007 0.10512376 0.10537744
|
|
0.108845 0.10902834 0.10820317 0.10568619]
|
|
|
|
mean value: 0.10740060806274414
|
|
|
|
key: test_mcc
|
|
value: [0.8580978 0.90856265 0.85628845 0.92156528 0.90921537 0.89813651
|
|
0.93471203 0.88299739 0.8732726 0.8553372 ]
|
|
|
|
mean value: 0.8898185275108337
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92810458 0.95424837 0.92810458 0.96078431 0.95424837 0.94771242
|
|
0.96732026 0.94117647 0.93421053 0.92763158]
|
|
|
|
mean value: 0.9443541451668387
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92993631 0.95364238 0.92810458 0.96052632 0.95364238 0.95
|
|
0.96774194 0.94267516 0.9375 0.92810458]
|
|
|
|
mean value: 0.9451873634780216
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90123457 0.96 0.92207792 0.96052632 0.97297297 0.91566265
|
|
0.96153846 0.925 0.89285714 0.92207792]
|
|
|
|
mean value: 0.933394795581754
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.94736842 0.93421053 0.96052632 0.93506494 0.98701299
|
|
0.97402597 0.96103896 0.98684211 0.93421053]
|
|
|
|
mean value: 0.9580827067669173
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92831511 0.95420369 0.92814422 0.96078264 0.95437457 0.94745386
|
|
0.96727614 0.9410458 0.93421053 0.92763158]
|
|
|
|
mean value: 0.9443438140806562
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.86904762 0.91139241 0.86585366 0.92405063 0.91139241 0.9047619
|
|
0.9375 0.89156627 0.88235294 0.86585366]
|
|
|
|
mean value: 0.8963771490157381
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...05', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.41917253 1.4527061 1.52037549 1.44638467 1.48357606 1.44732165
|
|
1.43531227 1.51390028 1.49224877 1.45330882]
|
|
|
|
mean value: 1.4664306640625
|
|
|
|
key: score_time
|
|
value: [0.26775193 0.3004539 0.30319285 0.28838444 0.29361367 0.3063159
|
|
0.31030941 0.24813151 0.33361793 0.23446774]
|
|
|
|
mean value: 0.2886239290237427
|
|
|
|
key: test_mcc
|
|
value: [0.8580978 0.89542037 0.84313055 0.89574433 0.89574433 0.89813651
|
|
0.90857826 0.8562639 0.83074716 0.84210526]
|
|
|
|
mean value: 0.8723968474971242
|
|
|
|
key: train_mcc
|
|
value: [0.95365159 0.94789219 0.94931606 0.94938027 0.94207429 0.95365295
|
|
0.9492613 0.94778138 0.94509095 0.94650992]
|
|
|
|
mean value: 0.9484610899058289
|
|
|
|
key: test_accuracy
|
|
value: [0.92810458 0.94771242 0.92156863 0.94771242 0.94771242 0.94771242
|
|
0.95424837 0.92810458 0.91447368 0.92105263]
|
|
|
|
mean value: 0.9358402132782938
|
|
|
|
key: train_accuracy
|
|
value: [0.97672727 0.97381818 0.97454545 0.97454545 0.97090909 0.97672727
|
|
0.97454545 0.97381818 0.97238372 0.97311047]
|
|
|
|
mean value: 0.9741130549682875
|
|
|
|
key: test_fscore
|
|
value: [0.92993631 0.94736842 0.92105263 0.94805195 0.94736842 0.95
|
|
0.95424837 0.92903226 0.91719745 0.92105263]
|
|
|
|
mean value: 0.9365308435354477
|
|
|
|
key: train_fscore
|
|
value: [0.97697842 0.97413793 0.97483825 0.97487437 0.97122302 0.97694524
|
|
0.97476568 0.97402597 0.97274032 0.97343862]
|
|
|
|
mean value: 0.9743967825236611
|
|
|
|
key: test_precision
|
|
value: [0.90123457 0.94736842 0.92105263 0.93589744 0.96 0.91566265
|
|
0.96052632 0.92307692 0.88888889 0.92105263]
|
|
|
|
mean value: 0.9274760466366891
|
|
|
|
key: train_precision
|
|
value: [0.96723647 0.96306818 0.96443812 0.96312057 0.9601707 0.96718973
|
|
0.96571429 0.96566524 0.96033994 0.96170213]
|
|
|
|
mean value: 0.9638645357502966
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.94736842 0.92105263 0.96052632 0.93506494 0.98701299
|
|
0.94805195 0.93506494 0.94736842 0.92105263]
|
|
|
|
mean value: 0.946308954203691
|
|
|
|
key: train_recall
|
|
value: [0.9869186 0.98546512 0.98546512 0.9869186 0.98253275 0.98689956
|
|
0.98398836 0.98253275 0.98546512 0.98546512]
|
|
|
|
mean value: 0.9851651095088182
|
|
|
|
key: test_roc_auc
|
|
value: [0.92831511 0.94771018 0.92156528 0.94779563 0.94779563 0.94745386
|
|
0.95428913 0.92805878 0.91447368 0.92105263]
|
|
|
|
mean value: 0.9358509911141489
|
|
|
|
key: train_roc_auc
|
|
value: [0.97671986 0.97380971 0.97453751 0.97453645 0.97091754 0.97673467
|
|
0.97455232 0.97382452 0.97238372 0.97311047]
|
|
|
|
mean value: 0.9741126739108358
|
|
|
|
key: test_jcc
|
|
value: [0.86904762 0.9 0.85365854 0.90123457 0.9 0.9047619
|
|
0.9125 0.86746988 0.84705882 0.85365854]
|
|
|
|
mean value: 0.8809389867928974
|
|
|
|
key: train_jcc
|
|
value: [0.95499297 0.94957983 0.95091164 0.95098039 0.94405594 0.95492958
|
|
0.95077356 0.94936709 0.94692737 0.94825175]
|
|
|
|
mean value: 0.9500770123744795
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.03081012 0.01998234 0.01959682 0.01960373 0.01962972 0.01948071
|
|
0.01957202 0.01955676 0.01923323 0.01917768]
|
|
|
|
mean value: 0.020664310455322264
|
|
|
|
key: score_time
|
|
value: [0.01384521 0.01289678 0.07722211 0.01287508 0.01287889 0.01301694
|
|
0.01292157 0.01290417 0.01277256 0.01296306]
|
|
|
|
mean value: 0.019429636001586915
|
|
|
|
key: test_mcc
|
|
value: [0.5962008 0.62329017 0.63496441 0.73075678 0.71260054 0.63425056
|
|
0.6601162 0.67327409 0.60715823 0.69791237]
|
|
|
|
mean value: 0.657052415810285
|
|
|
|
key: train_mcc
|
|
value: [0.68630036 0.66997226 0.68009938 0.66706243 0.671509 0.66729052
|
|
0.68029451 0.68166853 0.69934143 0.67631874]
|
|
|
|
mean value: 0.6779857153012689
|
|
|
|
key: test_accuracy
|
|
value: [0.79738562 0.81045752 0.81699346 0.8627451 0.85620915 0.81699346
|
|
0.83006536 0.83660131 0.80263158 0.84868421]
|
|
|
|
mean value: 0.8278766769865842
|
|
|
|
key: train_accuracy
|
|
value: [0.84290909 0.83490909 0.84 0.83345455 0.83563636 0.83345455
|
|
0.84 0.84072727 0.84956395 0.83793605]
|
|
|
|
mean value: 0.8388590909090909
|
|
|
|
key: test_fscore
|
|
value: [0.80254777 0.8 0.82051282 0.86956522 0.85897436 0.81578947
|
|
0.83116883 0.83660131 0.81012658 0.84563758]
|
|
|
|
mean value: 0.8290923945792803
|
|
|
|
key: train_fscore
|
|
value: [0.84593438 0.83680805 0.84149856 0.83537024 0.83764368 0.83607731
|
|
0.84218077 0.84255931 0.85139986 0.84082798]
|
|
|
|
mean value: 0.841030013525283
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.84057971 0.8 0.82352941 0.84810127 0.82666667
|
|
0.83116883 0.84210526 0.7804878 0.8630137 ]
|
|
|
|
mean value: 0.8233430430011774
|
|
|
|
key: train_precision
|
|
value: [0.83053221 0.82788051 0.83428571 0.82645804 0.82695035 0.82253521
|
|
0.83026874 0.83238636 0.84113475 0.82608696]
|
|
|
|
mean value: 0.8298518855214777
|
|
|
|
key: test_recall
|
|
value: [0.82894737 0.76315789 0.84210526 0.92105263 0.87012987 0.80519481
|
|
0.83116883 0.83116883 0.84210526 0.82894737]
|
|
|
|
mean value: 0.8363978127136023
|
|
|
|
key: train_recall
|
|
value: [0.8619186 0.84593023 0.84883721 0.84447674 0.84861718 0.85007278
|
|
0.85443959 0.85298399 0.8619186 0.85610465]
|
|
|
|
mean value: 0.8525299583629532
|
|
|
|
key: test_roc_auc
|
|
value: [0.79759057 0.81015038 0.81715653 0.86312372 0.85611757 0.81707109
|
|
0.8300581 0.83663705 0.80263158 0.84868421]
|
|
|
|
mean value: 0.827922077922078
|
|
|
|
key: train_roc_auc
|
|
value: [0.84289526 0.83490107 0.83999357 0.83344652 0.8356458 0.83346662
|
|
0.84001049 0.84073618 0.84956395 0.83793605]
|
|
|
|
mean value: 0.8388595511323246
|
|
|
|
key: test_jcc
|
|
value: [0.67021277 0.66666667 0.69565217 0.76923077 0.75280899 0.68888889
|
|
0.71111111 0.71910112 0.68085106 0.73255814]
|
|
|
|
mean value: 0.7087081691492148
|
|
|
|
key: train_jcc
|
|
value: [0.73300371 0.71940667 0.72636816 0.71728395 0.72064277 0.71832718
|
|
0.72738538 0.72795031 0.74125 0.72536946]
|
|
|
|
mean value: 0.7256987591762736
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.21651745 0.21218777 0.22188902 0.21554422 0.21811533 0.37120724
|
|
0.20402408 0.26554132 0.83235073 0.2088933 ]
|
|
|
|
mean value: 0.2966270446777344
|
|
|
|
key: score_time
|
|
value: [0.01165318 0.0117445 0.01191878 0.01185989 0.01184273 0.01159692
|
|
0.01163721 0.01233768 0.01167297 0.01216745]
|
|
|
|
mean value: 0.011843132972717284
|
|
|
|
key: test_mcc
|
|
value: [0.84433845 0.89542037 0.88305705 0.89542037 0.89574433 0.87189727
|
|
0.92186711 0.89570879 0.92105263 0.88226658]
|
|
|
|
mean value: 0.8906772940665643
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92156863 0.94771242 0.94117647 0.94771242 0.94771242 0.93464052
|
|
0.96078431 0.94771242 0.96052632 0.94078947]
|
|
|
|
mean value: 0.9450335397316821
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.94736842 0.94193548 0.94736842 0.94736842 0.9375
|
|
0.96153846 0.94871795 0.96052632 0.94193548]
|
|
|
|
mean value: 0.9457335880022637
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.94736842 0.92405063 0.94736842 0.96 0.90361446
|
|
0.94936709 0.93670886 0.96052632 0.92405063]
|
|
|
|
mean value: 0.9353054830915936
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.94736842 0.96052632 0.94736842 0.93506494 0.97402597
|
|
0.97402597 0.96103896 0.96052632 0.96052632]
|
|
|
|
mean value: 0.956784005468216
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92173616 0.94771018 0.94130212 0.94771018 0.94779563 0.93438141
|
|
0.9606972 0.94762474 0.96052632 0.94078947]
|
|
|
|
mean value: 0.9450273410799727
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.9 0.8902439 0.9 0.9 0.88235294
|
|
0.92592593 0.90243902 0.92405063 0.8902439 ]
|
|
|
|
mean value: 0.8972399186424939
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06418848 0.10798907 0.07780838 0.10901904 0.07653379 0.07374358
|
|
0.09516168 0.10582447 0.08670592 0.0995965 ]
|
|
|
|
mean value: 0.08965709209442138
|
|
|
|
key: score_time
|
|
value: [0.01282477 0.01284122 0.01277304 0.01288509 0.01299357 0.01277947
|
|
0.01987886 0.01280427 0.01283216 0.01978779]
|
|
|
|
mean value: 0.014240026473999023
|
|
|
|
key: test_mcc
|
|
value: [0.75152581 0.79114682 0.7286962 0.81960182 0.783044 0.72432484
|
|
0.77934127 0.80718653 0.71549794 0.83074716]
|
|
|
|
mean value: 0.773111239139819
|
|
|
|
key: train_mcc
|
|
value: [0.84320747 0.82819649 0.84843079 0.83061833 0.83269488 0.82957767
|
|
0.84322703 0.8227867 0.84097653 0.84020023]
|
|
|
|
mean value: 0.8359916130741504
|
|
|
|
key: test_accuracy
|
|
value: [0.86928105 0.89542484 0.8627451 0.90849673 0.88888889 0.85620915
|
|
0.88888889 0.90196078 0.84868421 0.91447368]
|
|
|
|
mean value: 0.8835053319573444
|
|
|
|
key: train_accuracy
|
|
value: [0.92 0.91272727 0.92290909 0.91345455 0.91490909 0.91345455
|
|
0.92 0.90981818 0.9193314 0.91860465]
|
|
|
|
mean value: 0.9165208773784355
|
|
|
|
key: test_fscore
|
|
value: [0.87951807 0.8961039 0.86792453 0.91139241 0.89570552 0.86904762
|
|
0.89308176 0.9068323 0.86390533 0.91719745]
|
|
|
|
mean value: 0.8900708879094263
|
|
|
|
key: train_fscore
|
|
value: [0.92339833 0.91620112 0.92587413 0.91741846 0.91823899 0.91672498
|
|
0.92329149 0.91352859 0.92221444 0.92189679]
|
|
|
|
mean value: 0.9198787319253657
|
|
|
|
key: test_precision
|
|
value: [0.81111111 0.88461538 0.8313253 0.87804878 0.84883721 0.8021978
|
|
0.86585366 0.86904762 0.78494624 0.88888889]
|
|
|
|
mean value: 0.846487199195148
|
|
|
|
key: train_precision
|
|
value: [0.88636364 0.88172043 0.89218329 0.87782205 0.88306452 0.88274933
|
|
0.88621151 0.8768407 0.89039242 0.88605898]
|
|
|
|
mean value: 0.8843406854568912
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.90789474 0.90789474 0.94736842 0.94805195 0.94805195
|
|
0.92207792 0.94805195 0.96052632 0.94736842]
|
|
|
|
mean value: 0.9397812713602187
|
|
|
|
key: train_recall
|
|
value: [0.96366279 0.95348837 0.9622093 0.96075581 0.95633188 0.95342067
|
|
0.9636099 0.95342067 0.95639535 0.96075581]
|
|
|
|
mean value: 0.9584050556853188
|
|
|
|
key: test_roc_auc
|
|
value: [0.86987355 0.89550581 0.86303828 0.90874915 0.88849966 0.85560492
|
|
0.88867054 0.90165755 0.84868421 0.91447368]
|
|
|
|
mean value: 0.8834757347915243
|
|
|
|
key: train_roc_auc
|
|
value: [0.91996822 0.91269761 0.92288049 0.91342012 0.91493919 0.91348359
|
|
0.92003169 0.90984987 0.9193314 0.91860465]
|
|
|
|
mean value: 0.9165206831183779
|
|
|
|
key: test_jcc
|
|
value: [0.78494624 0.81176471 0.76666667 0.8372093 0.81111111 0.76842105
|
|
0.80681818 0.82954545 0.76041667 0.84705882]
|
|
|
|
mean value: 0.8023958201736145
|
|
|
|
key: train_jcc
|
|
value: [0.85769728 0.84536082 0.86197917 0.8474359 0.84883721 0.84625323
|
|
0.85751295 0.84082157 0.8556567 0.85510996]
|
|
|
|
mean value: 0.8516664789110635
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01832891 0.02825451 0.01833391 0.01843882 0.01830125 0.01842666
|
|
0.01840496 0.01851463 0.01794028 0.0179925 ]
|
|
|
|
mean value: 0.01929364204406738
|
|
|
|
key: score_time
|
|
value: [0.01265311 0.01276994 0.01266003 0.01267457 0.01258183 0.01259828
|
|
0.01256871 0.01260448 0.01257634 0.01261044]
|
|
|
|
mean value: 0.01262977123260498
|
|
|
|
key: test_mcc
|
|
value: [0.54382141 0.6127583 0.6602839 0.63496441 0.60796725 0.6602839
|
|
0.60810119 0.64706889 0.5797509 0.64613668]
|
|
|
|
mean value: 0.6201136826518324
|
|
|
|
key: train_mcc
|
|
value: [0.63785374 0.63054715 0.61891231 0.62473051 0.63346352 0.62183926
|
|
0.62038185 0.6261962 0.63811442 0.63081395]
|
|
|
|
mean value: 0.6282852926006686
|
|
|
|
key: test_accuracy
|
|
value: [0.77124183 0.80392157 0.83006536 0.81699346 0.80392157 0.83006536
|
|
0.80392157 0.82352941 0.78947368 0.82236842]
|
|
|
|
mean value: 0.8095502235982113
|
|
|
|
key: train_accuracy
|
|
value: [0.81890909 0.81527273 0.80945455 0.81236364 0.81672727 0.81090909
|
|
0.81018182 0.81309091 0.8190407 0.81540698]
|
|
|
|
mean value: 0.8141356765327695
|
|
|
|
key: test_fscore
|
|
value: [0.77707006 0.78873239 0.82666667 0.82051282 0.80769231 0.83333333
|
|
0.80263158 0.82580645 0.79487179 0.81632653]
|
|
|
|
mean value: 0.8093643942309905
|
|
|
|
key: train_fscore
|
|
value: [0.81811541 0.81567489 0.80931587 0.81222707 0.81605839 0.80994152
|
|
0.81073241 0.81227173 0.81811541 0.81540698]
|
|
|
|
mean value: 0.8137859694243422
|
|
|
|
key: test_precision
|
|
value: [0.75308642 0.84848485 0.83783784 0.8 0.79746835 0.82278481
|
|
0.81333333 0.82051282 0.775 0.84507042]
|
|
|
|
mean value: 0.81135788470141
|
|
|
|
key: train_precision
|
|
value: [0.82232012 0.81449275 0.81049563 0.81341108 0.81844802 0.81350954
|
|
0.80780347 0.81524927 0.82232012 0.81540698]
|
|
|
|
mean value: 0.815345697413874
|
|
|
|
key: test_recall
|
|
value: [0.80263158 0.73684211 0.81578947 0.84210526 0.81818182 0.84415584
|
|
0.79220779 0.83116883 0.81578947 0.78947368]
|
|
|
|
mean value: 0.8088345864661655
|
|
|
|
key: train_recall
|
|
value: [0.81395349 0.81686047 0.80813953 0.81104651 0.81368268 0.80640466
|
|
0.81368268 0.80931587 0.81395349 0.81540698]
|
|
|
|
mean value: 0.8122446345756745
|
|
|
|
key: test_roc_auc
|
|
value: [0.77144566 0.80348599 0.82997266 0.81715653 0.80382775 0.82997266
|
|
0.80399863 0.82347915 0.78947368 0.82236842]
|
|
|
|
mean value: 0.8095181134654819
|
|
|
|
key: train_roc_auc
|
|
value: [0.8189127 0.81527157 0.8094555 0.81236459 0.81672506 0.81090582
|
|
0.81018436 0.81308817 0.8190407 0.81540698]
|
|
|
|
mean value: 0.8141355446667344
|
|
|
|
key: test_jcc
|
|
value: [0.63541667 0.65116279 0.70454545 0.69565217 0.67741935 0.71428571
|
|
0.67032967 0.7032967 0.65957447 0.68965517]
|
|
|
|
mean value: 0.6801338169072536
|
|
|
|
key: train_jcc
|
|
value: [0.69221261 0.68872549 0.6797066 0.68382353 0.6892725 0.68058968
|
|
0.68170732 0.68388684 0.69221261 0.68834356]
|
|
|
|
mean value: 0.6860480735287339
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03034759 0.03951335 0.04411817 0.04173803 0.04469919 0.0521946
|
|
0.03450179 0.03709054 0.0291028 0.04602385]
|
|
|
|
mean value: 0.039932990074157716
|
|
|
|
key: score_time
|
|
value: [0.0125916 0.01269341 0.01273084 0.01266551 0.01275921 0.01306891
|
|
0.01623774 0.01548362 0.01263404 0.01276422]
|
|
|
|
mean value: 0.013362908363342285
|
|
|
|
key: test_mcc
|
|
value: [0.7170263 0.5587377 0.79506337 0.56786741 0.76875613 0.71347968
|
|
0.7117436 0.76499745 0.64605828 0.750585 ]
|
|
|
|
mean value: 0.6994314913250289
|
|
|
|
key: train_mcc
|
|
value: [0.77927997 0.5469202 0.80317154 0.59528597 0.82345158 0.80876422
|
|
0.70266394 0.79219176 0.81323055 0.79093784]
|
|
|
|
mean value: 0.7455897566401816
|
|
|
|
key: test_accuracy
|
|
value: [0.84313725 0.74509804 0.89542484 0.76470588 0.88235294 0.85620915
|
|
0.83660131 0.88235294 0.81578947 0.875 ]
|
|
|
|
mean value: 0.8396671826625387
|
|
|
|
key: train_accuracy
|
|
value: [0.88218182 0.74618182 0.90109091 0.77236364 0.90981818 0.904
|
|
0.83418182 0.896 0.90479651 0.89462209]
|
|
|
|
mean value: 0.8645236786469345
|
|
|
|
key: test_fscore
|
|
value: [0.86206897 0.66086957 0.88888889 0.70967742 0.88888889 0.85333333
|
|
0.8603352 0.88157895 0.83333333 0.87248322]
|
|
|
|
mean value: 0.8311457758909573
|
|
|
|
key: train_fscore
|
|
value: [0.89271523 0.67595172 0.89865872 0.71519563 0.91388889 0.90178571
|
|
0.85624212 0.89704824 0.90909091 0.89105935]
|
|
|
|
mean value: 0.8551636521059554
|
|
|
|
key: test_precision
|
|
value: [0.76530612 0.97435897 0.94117647 0.91666667 0.84705882 0.87671233
|
|
0.75490196 0.89333333 0.76086957 0.89041096]
|
|
|
|
mean value: 0.8620795204598539
|
|
|
|
key: train_precision
|
|
value: [0.81995134 0.93573265 0.92201835 0.95620438 0.87383798 0.92237443
|
|
0.75528365 0.88746439 0.86985392 0.9222395 ]
|
|
|
|
mean value: 0.8864960580789983
|
|
|
|
key: test_recall
|
|
value: [0.98684211 0.5 0.84210526 0.57894737 0.93506494 0.83116883
|
|
1. 0.87012987 0.92105263 0.85526316]
|
|
|
|
mean value: 0.8320574162679426
|
|
|
|
key: train_recall
|
|
value: [0.97965116 0.52906977 0.87645349 0.57122093 0.95778748 0.88209607
|
|
0.98835517 0.90684134 0.95203488 0.8619186 ]
|
|
|
|
mean value: 0.8505428895433466
|
|
|
|
key: test_roc_auc
|
|
value: [0.8440704 0.74350649 0.89507861 0.76349966 0.88200615 0.85637389
|
|
0.83552632 0.88243336 0.81578947 0.875 ]
|
|
|
|
mean value: 0.8393284347231715
|
|
|
|
key: train_roc_auc
|
|
value: [0.88211088 0.74633983 0.90110884 0.77251003 0.90985304 0.90398408
|
|
0.83429386 0.89600788 0.90479651 0.89462209]
|
|
|
|
mean value: 0.8645627052232491
|
|
|
|
key: test_jcc
|
|
value: [0.75757576 0.49350649 0.8 0.55 0.8 0.74418605
|
|
0.75490196 0.78823529 0.71428571 0.77380952]
|
|
|
|
mean value: 0.7176500790591078
|
|
|
|
key: train_jcc
|
|
value: [0.8062201 0.51051893 0.81596752 0.55665722 0.84143223 0.82113821
|
|
0.74862183 0.81331593 0.83333333 0.80352304]
|
|
|
|
mean value: 0.755072833936398
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03850412 0.05590057 0.05355477 0.05945849 0.04342318 0.04294348
|
|
0.04142976 0.05932379 0.04073524 0.04787827]
|
|
|
|
mean value: 0.04831516742706299
|
|
|
|
key: score_time
|
|
value: [0.01276469 0.01280403 0.01285434 0.01274776 0.0127275 0.01270294
|
|
0.01266289 0.01271987 0.01262021 0.01273441]
|
|
|
|
mean value: 0.012733864784240722
|
|
|
|
key: test_mcc
|
|
value: [0.56751545 0.5587377 0.7910755 0.83345743 0.76469583 0.7222367
|
|
0.82137062 0.76694032 0.69823717 0.68849802]
|
|
|
|
mean value: 0.721276473857401
|
|
|
|
key: train_mcc
|
|
value: [0.72765791 0.57409835 0.84747197 0.78506565 0.82982653 0.78075296
|
|
0.81013722 0.80330096 0.83469587 0.73756632]
|
|
|
|
mean value: 0.7730573740726931
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.74509804 0.89542484 0.91503268 0.88235294 0.84313725
|
|
0.90849673 0.88235294 0.83552632 0.83552632]
|
|
|
|
mean value: 0.8520725834193327
|
|
|
|
key: train_accuracy
|
|
value: [0.85672727 0.75636364 0.92363636 0.89018182 0.91490909 0.88218182
|
|
0.90109091 0.89745455 0.91351744 0.86264535]
|
|
|
|
mean value: 0.8798708245243129
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66086957 0.89333333 0.91034483 0.88311688 0.86516854
|
|
0.91358025 0.8875 0.85549133 0.81481481]
|
|
|
|
mean value: 0.8434219539787821
|
|
|
|
key: train_fscore
|
|
value: [0.84125705 0.68603561 0.92451474 0.88393543 0.91503268 0.89299868
|
|
0.90748299 0.90427699 0.91899251 0.84892086]
|
|
|
|
mean value: 0.8723447549413826
|
|
|
|
key: test_precision
|
|
value: [0.85 0.97435897 0.90540541 0.95652174 0.88311688 0.76237624
|
|
0.87058824 0.85542169 0.7628866 0.93220339]
|
|
|
|
mean value: 0.8752879149445218
|
|
|
|
key: train_precision
|
|
value: [0.94394213 0.96569921 0.91465149 0.93800979 0.91304348 0.81741233
|
|
0.85185185 0.84732824 0.86427657 0.94316163]
|
|
|
|
mean value: 0.8999376734514775
|
|
|
|
key: test_recall
|
|
value: [0.67105263 0.5 0.88157895 0.86842105 0.88311688 1.
|
|
0.96103896 0.92207792 0.97368421 0.72368421]
|
|
|
|
mean value: 0.8384654818865345
|
|
|
|
key: train_recall
|
|
value: [0.75872093 0.53197674 0.93459302 0.83575581 0.91703057 0.98398836
|
|
0.97088792 0.96943231 0.98110465 0.77180233]
|
|
|
|
mean value: 0.8655292644121729
|
|
|
|
key: test_roc_auc
|
|
value: [0.77708476 0.74350649 0.89533493 0.91473001 0.88234792 0.84210526
|
|
0.90815106 0.88209159 0.83552632 0.83552632]
|
|
|
|
mean value: 0.8516404647983595
|
|
|
|
key: train_roc_auc
|
|
value: [0.8567986 0.75652695 0.92362839 0.89022143 0.91491063 0.88225581
|
|
0.90114163 0.89750685 0.91351744 0.86264535]
|
|
|
|
mean value: 0.8799153083849565
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.49350649 0.80722892 0.83544304 0.79069767 0.76237624
|
|
0.84090909 0.79775281 0.74747475 0.6875 ]
|
|
|
|
mean value: 0.7362889006558797
|
|
|
|
key: train_jcc
|
|
value: [0.72600834 0.52211127 0.85962567 0.79201102 0.84337349 0.80668258
|
|
0.83063512 0.82527881 0.85012594 0.7375 ]
|
|
|
|
mean value: 0.7793352247112477
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline:/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.49280715 0.48593616 0.49191427 0.50021434 0.4824698 0.48278832
|
|
0.48085332 0.50741911 0.50439215 0.47906876]
|
|
|
|
mean value: 0.49078633785247805
|
|
|
|
key: score_time
|
|
value: [0.01859808 0.01796675 0.01763153 0.01844335 0.01700687 0.01760054
|
|
0.0184195 0.01879811 0.01850104 0.01665831]
|
|
|
|
mean value: 0.0179624080657959
|
|
|
|
key: test_mcc
|
|
value: [0.81960182 0.86959495 0.81698565 0.87042236 0.8580978 0.79506337
|
|
0.86927546 0.92280176 0.78947368 0.82901914]
|
|
|
|
mean value: 0.8440335996568847
|
|
|
|
key: train_mcc
|
|
value: [0.91730792 0.91582498 0.92151623 0.90715317 0.91737285 0.89971436
|
|
0.89688899 0.91569903 0.91601123 0.9098991 ]
|
|
|
|
mean value: 0.911738786114328
|
|
|
|
key: test_accuracy
|
|
value: [0.90849673 0.93464052 0.90849673 0.93464052 0.92810458 0.89542484
|
|
0.93464052 0.96078431 0.89473684 0.91447368]
|
|
|
|
mean value: 0.9214439284485725
|
|
|
|
key: train_accuracy
|
|
value: [0.95854545 0.95781818 0.96072727 0.95345455 0.95854545 0.94981818
|
|
0.94836364 0.95781818 0.95784884 0.95494186]
|
|
|
|
mean value: 0.9557881606765328
|
|
|
|
key: test_fscore
|
|
value: [0.91139241 0.93506494 0.90789474 0.93243243 0.9261745 0.90123457
|
|
0.93506494 0.96202532 0.89473684 0.91390728]
|
|
|
|
mean value: 0.92199279523424
|
|
|
|
key: train_fscore
|
|
value: [0.95902229 0.95827338 0.96098266 0.95402299 0.95902229 0.95010846
|
|
0.94881038 0.95803184 0.95839311 0.95507246]
|
|
|
|
mean value: 0.9561739858027037
|
|
|
|
key: test_precision
|
|
value: [0.87804878 0.92307692 0.90789474 0.95833333 0.95833333 0.85882353
|
|
0.93506494 0.9382716 0.89473684 0.92 ]
|
|
|
|
mean value: 0.9172584018593735
|
|
|
|
key: train_precision
|
|
value: [0.9487909 0.94871795 0.95545977 0.94318182 0.94744318 0.94396552
|
|
0.94 0.95251799 0.94617564 0.95231214]
|
|
|
|
mean value: 0.947856489396719
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.94736842 0.90789474 0.90789474 0.8961039 0.94805195
|
|
0.93506494 0.98701299 0.89473684 0.90789474]
|
|
|
|
mean value: 0.9279391660970608
|
|
|
|
key: train_recall
|
|
value: [0.96947674 0.96802326 0.96656977 0.96511628 0.97088792 0.95633188
|
|
0.95778748 0.9636099 0.97093023 0.95784884]
|
|
|
|
mean value: 0.9646582292407163
|
|
|
|
key: test_roc_auc
|
|
value: [0.90874915 0.93472317 0.90849282 0.93446685 0.92831511 0.89507861
|
|
0.93463773 0.96061176 0.89473684 0.91447368]
|
|
|
|
mean value: 0.9214285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.9585375 0.95781075 0.96072302 0.95344606 0.95855442 0.94982292
|
|
0.94837049 0.95782239 0.95784884 0.95494186]
|
|
|
|
mean value: 0.955787824548932
|
|
|
|
key: test_jcc
|
|
value: [0.8372093 0.87804878 0.8313253 0.87341772 0.8625 0.82022472
|
|
0.87804878 0.92682927 0.80952381 0.84146341]
|
|
|
|
mean value: 0.855859109757676
|
|
|
|
key: train_jcc
|
|
value: [0.92127072 0.9198895 0.92489569 0.91208791 0.92127072 0.90495868
|
|
0.90260631 0.91944444 0.92011019 0.91400832]
|
|
|
|
mean value: 0.9160542486527511
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.26527977 0.26414466 0.26862955 0.27799296 0.28035855 0.17143774
|
|
0.17958832 0.27080297 0.26924849 0.29021096]
|
|
|
|
mean value: 0.2537693977355957
|
|
|
|
key: score_time
|
|
value: [0.02526283 0.03027081 0.03136849 0.03633428 0.03711796 0.02036166
|
|
0.0312829 0.03433752 0.02898359 0.04428291]
|
|
|
|
mean value: 0.03196029663085938
|
|
|
|
key: test_mcc
|
|
value: [0.8319081 0.83011452 0.77831037 0.92156528 0.88305705 0.82137062
|
|
0.83065388 0.90916914 0.90986594 0.8553372 ]
|
|
|
|
mean value: 0.8571352090194626
|
|
|
|
key: train_mcc
|
|
value: [0.99418603 0.99709513 0.99272831 0.99709091 0.99419867 0.99127272
|
|
0.98981921 0.99272831 0.99129582 0.98982663]
|
|
|
|
mean value: 0.9930241750645628
|
|
|
|
key: test_accuracy
|
|
value: [0.91503268 0.91503268 0.88888889 0.96078431 0.94117647 0.90849673
|
|
0.91503268 0.95424837 0.95394737 0.92763158]
|
|
|
|
mean value: 0.9280271757825937
|
|
|
|
key: train_accuracy
|
|
value: [0.99709091 0.99854545 0.99636364 0.99854545 0.99709091 0.99563636
|
|
0.99490909 0.99636364 0.99563953 0.99491279]
|
|
|
|
mean value: 0.996509778012685
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.91719745 0.91390728 0.88590604 0.96052632 0.94039735 0.91358025
|
|
0.91719745 0.95541401 0.95541401 0.92810458]
|
|
|
|
mean value: 0.9287644743832804
|
|
|
|
key: train_fscore
|
|
value: [0.99708879 0.9985444 0.99636892 0.99854651 0.99709724 0.99563319
|
|
0.99490168 0.99635834 0.99562682 0.99490909]
|
|
|
|
mean value: 0.9965074975124733
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.92 0.90410959 0.96052632 0.95945946 0.87058824
|
|
0.9 0.9375 0.92592593 0.92207792]
|
|
|
|
mean value: 0.9189076336476883
|
|
|
|
key: train_precision
|
|
value: [0.99854227 1. 0.99564586 0.99854651 0.99421129 0.99563319
|
|
0.99562682 0.99708455 0.99853801 0.99563319]
|
|
|
|
mean value: 0.9969461694743348
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.90789474 0.86842105 0.96052632 0.92207792 0.96103896
|
|
0.93506494 0.97402597 0.98684211 0.93421053]
|
|
|
|
mean value: 0.9397470950102529
|
|
|
|
key: train_recall
|
|
value: [0.99563953 0.99709302 0.99709302 0.99854651 1. 0.99563319
|
|
0.99417758 0.99563319 0.99273256 0.99418605]
|
|
|
|
mean value: 0.9960734656917505
|
|
|
|
key: test_roc_auc
|
|
value: [0.91524265 0.91498633 0.88875598 0.96078264 0.94130212 0.90815106
|
|
0.91490089 0.95411825 0.95394737 0.92763158]
|
|
|
|
mean value: 0.9279818865345181
|
|
|
|
key: train_roc_auc
|
|
value: [0.99709197 0.99854651 0.99636311 0.99854545 0.99709302 0.99563636
|
|
0.99490856 0.99636311 0.99563953 0.99491279]
|
|
|
|
mean value: 0.996510041129278
|
|
|
|
key: test_jcc
|
|
value: [0.84705882 0.84146341 0.79518072 0.92405063 0.8875 0.84090909
|
|
0.84705882 0.91463415 0.91463415 0.86585366]
|
|
|
|
mean value: 0.8678343459624531
|
|
|
|
key: train_jcc
|
|
value: [0.99419448 0.99709302 0.99276411 0.99709724 0.99421129 0.99130435
|
|
0.98985507 0.99274311 0.99129173 0.98986975]
|
|
|
|
mean value: 0.993042415573158
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.19627285 1.0048027 1.13782144 0.96918082 1.06693482 0.97411537
|
|
1.04286337 1.09282875 1.01316643 0.98342586]
|
|
|
|
mean value: 1.0481412410736084
|
|
|
|
key: score_time
|
|
value: [0.06513882 0.0677371 0.0651989 0.03362179 0.03368759 0.06193662
|
|
0.06782055 0.06188536 0.03380275 0.06560493]
|
|
|
|
mean value: 0.055643439292907715
|
|
|
|
key: test_mcc
|
|
value: [0.7286962 0.72551702 0.76717745 0.74515756 0.76566357 0.72105644
|
|
0.783044 0.79185327 0.69021144 0.77962978]
|
|
|
|
mean value: 0.7498006720758227
|
|
|
|
key: train_mcc
|
|
value: [0.93526512 0.92934003 0.92653086 0.92664067 0.93376066 0.93215975
|
|
0.9292491 0.92643279 0.93370781 0.93079906]
|
|
|
|
mean value: 0.930388585410863
|
|
|
|
key: test_accuracy
|
|
value: [0.8627451 0.8627451 0.88235294 0.86928105 0.88235294 0.85620915
|
|
0.88888889 0.89542484 0.84210526 0.88815789]
|
|
|
|
mean value: 0.8730263157894737
|
|
|
|
key: train_accuracy
|
|
value: [0.96727273 0.96436364 0.96290909 0.96290909 0.96654545 0.96581818
|
|
0.96436364 0.96290909 0.96656977 0.96511628]
|
|
|
|
mean value: 0.9648776955602537
|
|
|
|
key: test_fscore
|
|
value: [0.86792453 0.86092715 0.88607595 0.87654321 0.88607595 0.86746988
|
|
0.89570552 0.89873418 0.85185185 0.89308176]
|
|
|
|
mean value: 0.8784389980294284
|
|
|
|
key: train_fscore
|
|
value: [0.96792587 0.96502498 0.96364932 0.96370107 0.96714286 0.96635648
|
|
0.96492484 0.96354539 0.96714286 0.96571429]
|
|
|
|
mean value: 0.9655127952451877
|
|
|
|
key: test_precision
|
|
value: [0.8313253 0.86666667 0.85365854 0.8255814 0.86419753 0.80898876
|
|
0.84883721 0.87654321 0.80232558 0.85542169]
|
|
|
|
mean value: 0.8433545882036035
|
|
|
|
key: train_precision
|
|
value: [0.94965035 0.94810659 0.94545455 0.94421199 0.94950912 0.95070423
|
|
0.94929577 0.94662921 0.9508427 0.9494382 ]
|
|
|
|
mean value: 0.9483842710160539
|
|
|
|
key: test_recall
|
|
value: [0.90789474 0.85526316 0.92105263 0.93421053 0.90909091 0.93506494
|
|
0.94805195 0.92207792 0.90789474 0.93421053]
|
|
|
|
mean value: 0.9174812030075188
|
|
|
|
key: train_recall
|
|
value: [0.9869186 0.98255814 0.98255814 0.98401163 0.98544396 0.98253275
|
|
0.98107715 0.98107715 0.98401163 0.98255814]
|
|
|
|
mean value: 0.9832747283436579
|
|
|
|
key: test_roc_auc
|
|
value: [0.86303828 0.86269651 0.88260424 0.86970267 0.88217703 0.85569036
|
|
0.88849966 0.89524949 0.84210526 0.88815789]
|
|
|
|
mean value: 0.8729921394395078
|
|
|
|
key: train_roc_auc
|
|
value: [0.96725843 0.96435039 0.96289479 0.96289373 0.96655919 0.96583033
|
|
0.96437578 0.96292229 0.96656977 0.96511628]
|
|
|
|
mean value: 0.9648770987779697
|
|
|
|
key: test_jcc
|
|
value: [0.76666667 0.75581395 0.79545455 0.78021978 0.79545455 0.76595745
|
|
0.81111111 0.81609195 0.74193548 0.80681818]
|
|
|
|
mean value: 0.7835523668915669
|
|
|
|
key: train_jcc
|
|
value: [0.9378453 0.93241379 0.92984869 0.92994505 0.93637621 0.93490305
|
|
0.93222683 0.92965517 0.93637621 0.93370166]
|
|
|
|
mean value: 0.9333291975251682
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.22990346 2.19403863 2.24637413 2.20530295 2.21160173 2.19768667
|
|
2.20086956 2.20155454 2.21410513 2.19921803]
|
|
|
|
mean value: 2.210065484046936
|
|
|
|
key: score_time
|
|
value: [0.01075768 0.01002407 0.00995326 0.00992203 0.01025414 0.00979948
|
|
0.01001334 0.01008415 0.010149 0.01006365]
|
|
|
|
mean value: 0.010102081298828124
|
|
|
|
key: test_mcc
|
|
value: [0.82420246 0.86927546 0.80399863 0.92156528 0.89542037 0.87398511
|
|
0.92186711 0.90916914 0.88165527 0.86962636]
|
|
|
|
mean value: 0.8770765186560748
|
|
|
|
key: train_mcc
|
|
value: [0.97967353 0.97253713 0.97097457 0.97820756 0.97396687 0.97683084
|
|
0.97679359 0.97532345 0.9797174 0.97246693]
|
|
|
|
mean value: 0.9756491883354808
|
|
|
|
key: test_accuracy
|
|
value: [0.90849673 0.93464052 0.90196078 0.96078431 0.94771242 0.93464052
|
|
0.96078431 0.95424837 0.94078947 0.93421053]
|
|
|
|
mean value: 0.9378267973856209
|
|
|
|
key: train_accuracy
|
|
value: [0.98981818 0.98618182 0.98545455 0.98909091 0.98690909 0.98836364
|
|
0.98836364 0.98763636 0.98982558 0.98619186]
|
|
|
|
mean value: 0.9877835623678647
|
|
|
|
key: test_fscore
|
|
value: [0.91358025 0.93421053 0.90196078 0.96052632 0.94805195 0.9382716
|
|
0.96153846 0.95541401 0.94117647 0.93589744]
|
|
|
|
mean value: 0.9390627807085775
|
|
|
|
key: train_fscore
|
|
value: [0.98986975 0.98632109 0.98554913 0.98913831 0.98701299 0.98843931
|
|
0.98842258 0.98769008 0.98988439 0.98628159]
|
|
|
|
mean value: 0.9878609217327718
|
|
|
|
key: test_precision
|
|
value: [0.86046512 0.93421053 0.8961039 0.96052632 0.94805195 0.89411765
|
|
0.94936709 0.9375 0.93506494 0.9125 ]
|
|
|
|
mean value: 0.922790747327153
|
|
|
|
key: train_precision
|
|
value: [0.98559078 0.97717546 0.97988506 0.98556999 0.97854077 0.98134864
|
|
0.98273381 0.98270893 0.9841954 0.97991392]
|
|
|
|
mean value: 0.9817662760062895
|
|
|
|
key: test_recall
|
|
value: [0.97368421 0.93421053 0.90789474 0.96052632 0.94805195 0.98701299
|
|
0.97402597 0.97402597 0.94736842 0.96052632]
|
|
|
|
mean value: 0.9567327409432672
|
|
|
|
key: train_recall
|
|
value: [0.99418605 0.99563953 0.99127907 0.99273256 0.99563319 0.99563319
|
|
0.99417758 0.99272198 0.99563953 0.99273256]
|
|
|
|
mean value: 0.994037524119021
|
|
|
|
key: test_roc_auc
|
|
value: [0.90892003 0.93463773 0.90199932 0.96078264 0.94771018 0.93429597
|
|
0.9606972 0.95411825 0.94078947 0.93421053]
|
|
|
|
mean value: 0.9378161312371839
|
|
|
|
key: train_roc_auc
|
|
value: [0.989815 0.98617493 0.98545031 0.98908826 0.98691543 0.98836892
|
|
0.98836786 0.98764006 0.98982558 0.98619186]
|
|
|
|
mean value: 0.9877838216377238
|
|
|
|
key: test_jcc
|
|
value: [0.84090909 0.87654321 0.82142857 0.92405063 0.90123457 0.88372093
|
|
0.92592593 0.91463415 0.88888889 0.87951807]
|
|
|
|
mean value: 0.8856854036704825
|
|
|
|
key: train_jcc
|
|
value: [0.97994269 0.97301136 0.97150997 0.97851003 0.97435897 0.97714286
|
|
0.97711016 0.97567954 0.97997139 0.97293447]
|
|
|
|
mean value: 0.9760171448913202
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04739761 0.05204701 0.04915428 0.04749823 0.05376077 0.04758549
|
|
0.04765797 0.04763842 0.0475862 0.04792452]
|
|
|
|
mean value: 0.04882504940032959
|
|
|
|
key: score_time
|
|
value: [0.01422596 0.01367545 0.01373053 0.01375723 0.0136137 0.01362586
|
|
0.01365042 0.01357007 0.0135498 0.01358485]
|
|
|
|
mean value: 0.013698387145996093
|
|
|
|
key: test_mcc
|
|
value: [0.17927998 0.24837132 0.3152957 0.27651176 0.30126523 0.25163936
|
|
0.26617701 0.20335531 0.03138824 0.22852652]
|
|
|
|
mean value: 0.23018104418317292
|
|
|
|
key: train_mcc
|
|
value: [0.27722567 0.25364344 0.2568801 0.27262939 0.29027953 0.25811124
|
|
0.25001123 0.25650673 0.27232483 0.27078208]
|
|
|
|
mean value: 0.26583942297502267
|
|
|
|
key: test_accuracy
|
|
value: [0.54901961 0.55555556 0.58823529 0.56862745 0.59477124 0.5620915
|
|
0.56862745 0.54248366 0.50657895 0.55921053]
|
|
|
|
mean value: 0.5595201238390093
|
|
|
|
key: train_accuracy
|
|
value: [0.57163636 0.56072727 0.56218182 0.56945455 0.57745455 0.56218182
|
|
0.55854545 0.56145455 0.5690407 0.56831395]
|
|
|
|
mean value: 0.5660991014799155
|
|
|
|
key: test_fscore
|
|
value: [0.67906977 0.69090909 0.70697674 0.69724771 0.71028037 0.69683258
|
|
0.7 0.6875 0.66063348 0.69124424]
|
|
|
|
mean value: 0.6920693985770544
|
|
|
|
key: train_fscore
|
|
value: [0.70025445 0.69494949 0.69565217 0.69918699 0.7028133 0.69534413
|
|
0.6935891 0.69499241 0.69883189 0.69847716]
|
|
|
|
mean value: 0.6974091103331622
|
|
|
|
key: test_precision
|
|
value: [0.52517986 0.52777778 0.54676259 0.53521127 0.55474453 0.53472222
|
|
0.53846154 0.52380952 0.50344828 0.53191489]
|
|
|
|
mean value: 0.5322032470946397
|
|
|
|
key: train_precision
|
|
value: [0.53876273 0.53250774 0.53333333 0.5375 0.54179811 0.5329713
|
|
0.5309119 0.53255814 0.53708041 0.53666147]
|
|
|
|
mean value: 0.5354085114250265
|
|
|
|
key: test_recall
|
|
value: [0.96052632 1. 1. 1. 0.98701299 1.
|
|
1. 1. 0.96052632 0.98684211]
|
|
|
|
mean value: 0.9894907723855092
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.55169173 0.55844156 0.59090909 0.57142857 0.5921907 0.55921053
|
|
0.56578947 0.53947368 0.50657895 0.55921053]
|
|
|
|
mean value: 0.5594924812030075
|
|
|
|
key: train_roc_auc
|
|
value: [0.5713246 0.56040757 0.56186317 0.56914119 0.57776163 0.5625
|
|
0.55886628 0.56177326 0.5690407 0.56831395]
|
|
|
|
mean value: 0.5660992349615789
|
|
|
|
key: test_jcc
|
|
value: [0.51408451 0.52777778 0.54676259 0.53521127 0.55072464 0.53472222
|
|
0.53846154 0.52380952 0.49324324 0.52816901]
|
|
|
|
mean value: 0.5292966321855916
|
|
|
|
key: train_jcc
|
|
value: [0.53876273 0.53250774 0.53333333 0.5375 0.54179811 0.5329713
|
|
0.5309119 0.53255814 0.53708041 0.53666147]
|
|
|
|
mean value: 0.5354085114250265
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03019214 0.03430319 0.03930664 0.04300427 0.03467107 0.04208589
|
|
0.0427537 0.04216647 0.04220176 0.04299521]
|
|
|
|
mean value: 0.03936803340911865
|
|
|
|
key: score_time
|
|
value: [0.02484775 0.02288175 0.01969838 0.0197773 0.01973033 0.0196712
|
|
0.01979804 0.01973248 0.01964068 0.01968527]
|
|
|
|
mean value: 0.020546317100524902
|
|
|
|
key: test_mcc
|
|
value: [0.74016183 0.84344558 0.76582319 0.78117468 0.783044 0.82137062
|
|
0.84767786 0.81941879 0.71549794 0.77800131]
|
|
|
|
mean value: 0.789561579965255
|
|
|
|
key: train_mcc
|
|
value: [0.81696831 0.81408326 0.82308041 0.81909233 0.81514715 0.81885093
|
|
0.81939934 0.81566178 0.81188389 0.81577901]
|
|
|
|
mean value: 0.8169946408029654
|
|
|
|
key: test_accuracy
|
|
value: [0.8627451 0.92156863 0.88235294 0.88888889 0.88888889 0.90849673
|
|
0.92156863 0.90849673 0.84868421 0.88815789]
|
|
|
|
mean value: 0.891984864121087
|
|
|
|
key: train_accuracy
|
|
value: [0.90618182 0.904 0.90909091 0.90690909 0.90472727 0.90690909
|
|
0.90690909 0.90545455 0.90406977 0.90552326]
|
|
|
|
mean value: 0.9059774841437632
|
|
|
|
key: test_fscore
|
|
value: [0.8742515 0.92207792 0.88461538 0.89308176 0.89570552 0.91358025
|
|
0.92592593 0.9125 0.86390533 0.89171975]
|
|
|
|
mean value: 0.8977363329684199
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.91097308 0.90958904 0.91385252 0.91196699 0.90996564 0.91172414
|
|
0.91196699 0.91022099 0.90846047 0.91034483]
|
|
|
|
mean value: 0.910906468402776
|
|
|
|
key: test_precision
|
|
value: [0.8021978 0.91025641 0.8625 0.85542169 0.84883721 0.87058824
|
|
0.88235294 0.87951807 0.78494624 0.86419753]
|
|
|
|
mean value: 0.8560816124686608
|
|
|
|
key: train_precision
|
|
value: [0.86727989 0.86010363 0.8689384 0.86553525 0.86197917 0.86631717
|
|
0.86440678 0.86596583 0.86870027 0.86614173]
|
|
|
|
mean value: 0.8655368118269422
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.93421053 0.90789474 0.93421053 0.94805195 0.96103896
|
|
0.97402597 0.94805195 0.96052632 0.92105263]
|
|
|
|
mean value: 0.944958988380041
|
|
|
|
key: train_recall
|
|
value: [0.95930233 0.96511628 0.96366279 0.96366279 0.9636099 0.96215429
|
|
0.9650655 0.95924309 0.95203488 0.95930233]
|
|
|
|
mean value: 0.9613154175552622
|
|
|
|
key: test_roc_auc
|
|
value: [0.86338004 0.92165072 0.8825188 0.88918319 0.88849966 0.90815106
|
|
0.92122351 0.9082365 0.84868421 0.88815789]
|
|
|
|
mean value: 0.8919685577580314
|
|
|
|
key: train_roc_auc
|
|
value: [0.90614316 0.90395552 0.90905119 0.90686779 0.90477007 0.90694924
|
|
0.90695136 0.90549364 0.90406977 0.90552326]
|
|
|
|
mean value: 0.9059774973765276
|
|
|
|
key: test_jcc
|
|
value: [0.77659574 0.85542169 0.79310345 0.80681818 0.81111111 0.84090909
|
|
0.86206897 0.83908046 0.76041667 0.8045977 ]
|
|
|
|
mean value: 0.8150123056645533
|
|
|
|
key: train_jcc
|
|
value: [0.8365019 0.83417085 0.84137056 0.83817952 0.83480454 0.83776933
|
|
0.83817952 0.83523447 0.83227446 0.83544304]
|
|
|
|
mean value: 0.8363928192931785
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.51442647 0.42798162 0.41593194 0.41041636 0.42726326 0.52341962
|
|
0.41059017 0.49641275 0.61367702 0.5853231 ]
|
|
|
|
mean value: 0.4825442314147949
|
|
|
|
key: score_time
|
|
value: [0.01987147 0.01970005 0.01983261 0.01969528 0.01977801 0.01981473
|
|
0.02017355 0.01977921 0.01974201 0.0197382 ]
|
|
|
|
mean value: 0.019812512397766113
|
|
|
|
key: test_mcc
|
|
value: [0.74016183 0.80396515 0.76582319 0.8181667 0.79506337 0.82137062
|
|
0.81941879 0.82137062 0.72681557 0.81692173]
|
|
|
|
mean value: 0.7929077560474234
|
|
|
|
key: train_mcc
|
|
value: [0.81696831 0.82568494 0.83152578 0.83061833 0.82614433 0.8313374
|
|
0.8333352 0.81810006 0.82853078 0.83344344]
|
|
|
|
mean value: 0.8275688557808094
|
|
|
|
key: test_accuracy
|
|
value: [0.8627451 0.90196078 0.88235294 0.90849673 0.89542484 0.90849673
|
|
0.90849673 0.90849673 0.85526316 0.90789474]
|
|
|
|
mean value: 0.8939628482972136
|
|
|
|
key: train_accuracy
|
|
value: [0.90618182 0.91127273 0.91418182 0.91345455 0.91127273 0.91418182
|
|
0.91490909 0.90763636 0.9127907 0.91497093]
|
|
|
|
mean value: 0.9120852536997885
|
|
|
|
key: test_fscore
|
|
value: [0.8742515 0.90066225 0.88461538 0.91025641 0.90123457 0.91358025
|
|
0.9125 0.91358025 0.86904762 0.91025641]
|
|
|
|
mean value: 0.8989984634565836
|
|
|
|
key: train_fscore
|
|
value: [0.91097308 0.91504178 0.9178273 0.91741846 0.91515994 0.91759777
|
|
0.91858038 0.91125087 0.91631799 0.91869354]
|
|
|
|
mean value: 0.9158861112907871
|
|
|
|
key: test_precision
|
|
value: [0.8021978 0.90666667 0.8625 0.8875 0.85882353 0.87058824
|
|
0.87951807 0.87058824 0.79347826 0.8875 ]
|
|
|
|
mean value: 0.861936080202319
|
|
|
|
key: train_precision
|
|
value: [0.86727989 0.87834225 0.88101604 0.87782205 0.87616511 0.88187919
|
|
0.88 0.87634409 0.88069705 0.88015979]
|
|
|
|
mean value: 0.8779705460521812
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.89473684 0.90789474 0.93421053 0.94805195 0.96103896
|
|
0.94805195 0.96103896 0.96052632 0.93421053]
|
|
|
|
mean value: 0.9410287081339713
|
|
|
|
key: train_recall
|
|
value: [0.95930233 0.95494186 0.95784884 0.96075581 0.95778748 0.95633188
|
|
0.96069869 0.94905386 0.95494186 0.96075581]
|
|
|
|
mean value: 0.9572418418469246
|
|
|
|
key: test_roc_auc
|
|
value: [0.86338004 0.90191388 0.8825188 0.9086637 0.89507861 0.90815106
|
|
0.9082365 0.90815106 0.85526316 0.90789474]
|
|
|
|
mean value: 0.8939251537935748
|
|
|
|
key: train_roc_auc
|
|
value: [0.90614316 0.91124094 0.91415004 0.91342012 0.91130653 0.91421245
|
|
0.91494237 0.90766646 0.9127907 0.91497093]
|
|
|
|
mean value: 0.9120843700280965
|
|
|
|
key: test_jcc
|
|
value: [0.77659574 0.81927711 0.79310345 0.83529412 0.82022472 0.84090909
|
|
0.83908046 0.84090909 0.76842105 0.83529412]
|
|
|
|
mean value: 0.8169108950005565
|
|
|
|
key: train_jcc
|
|
value: [0.8365019 0.84338896 0.84813385 0.8474359 0.84358974 0.84774194
|
|
0.84942085 0.83697047 0.84555985 0.8496144 ]
|
|
|
|
mean value: 0.8448357851824928
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04955769 0.04904199 0.05234981 0.05163145 0.04970956 0.05562735
|
|
0.0570662 0.04915619 0.05621696 0.05613136]
|
|
|
|
mean value: 0.05264885425567627
|
|
|
|
key: score_time
|
|
value: [0.01261473 0.01321888 0.01325297 0.01331329 0.01268411 0.01346564
|
|
0.01553273 0.01519084 0.01355028 0.01261425]
|
|
|
|
mean value: 0.01354377269744873
|
|
|
|
key: test_mcc
|
|
value: [0.65416089 0.81698565 0.76582319 0.79533886 0.71446184 0.76875613
|
|
0.85797782 0.80396515 0.68109062 0.77800131]
|
|
|
|
mean value: 0.7636561455141508
|
|
|
|
key: train_mcc
|
|
value: [0.8055112 0.78327149 0.79784482 0.7909309 0.80828773 0.7937117
|
|
0.77330527 0.79816189 0.78802463 0.78912553]
|
|
|
|
mean value: 0.7928175172421438
|
|
|
|
key: test_accuracy
|
|
value: [0.82352941 0.90849673 0.88235294 0.89542484 0.85620915 0.88235294
|
|
0.92810458 0.90196078 0.83552632 0.88815789]
|
|
|
|
mean value: 0.8802115583075335
|
|
|
|
key: train_accuracy
|
|
value: [0.90181818 0.89090909 0.89818182 0.89454545 0.90327273 0.896
|
|
0.88581818 0.89818182 0.8931686 0.89389535]
|
|
|
|
mean value: 0.8955791226215645
|
|
|
|
key: test_fscore
|
|
value: [0.83435583 0.90789474 0.88461538 0.9 0.8625 0.88888889
|
|
0.93081761 0.90322581 0.84848485 0.89171975]
|
|
|
|
mean value: 0.8852502848789522
|
|
|
|
key: train_fscore
|
|
value: [0.90513001 0.89421721 0.90126939 0.8981026 0.90627202 0.89922481
|
|
0.8893587 0.90140845 0.89655172 0.89689266]
|
|
|
|
mean value: 0.898842757028844
|
|
|
|
key: test_precision
|
|
value: [0.7816092 0.90789474 0.8625 0.85714286 0.8313253 0.84705882
|
|
0.90243902 0.8974359 0.78651685 0.86419753]
|
|
|
|
mean value: 0.8538120220744415
|
|
|
|
key: train_precision
|
|
value: [0.87619048 0.86849315 0.87534247 0.86938776 0.8784153 0.8715847
|
|
0.86202186 0.87312415 0.86903138 0.87225275]
|
|
|
|
mean value: 0.8715843978145863
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.90789474 0.90789474 0.94736842 0.8961039 0.93506494
|
|
0.96103896 0.90909091 0.92105263 0.92105263]
|
|
|
|
mean value: 0.9201298701298701
|
|
|
|
key: train_recall
|
|
value: [0.93604651 0.92151163 0.92877907 0.92877907 0.93595342 0.9286754
|
|
0.91848617 0.93158661 0.92587209 0.92296512]
|
|
|
|
mean value: 0.9278655089536576
|
|
|
|
key: test_roc_auc
|
|
value: [0.8239918 0.90849282 0.8825188 0.89576213 0.85594668 0.88200615
|
|
0.9278879 0.90191388 0.83552632 0.88815789]
|
|
|
|
mean value: 0.8802204374572795
|
|
|
|
key: train_roc_auc
|
|
value: [0.90179327 0.89088682 0.89815955 0.89452054 0.90329648 0.89602375
|
|
0.88584192 0.89820609 0.8931686 0.89389535]
|
|
|
|
mean value: 0.8955792373311668
|
|
|
|
key: test_jcc
|
|
value: [0.71578947 0.8313253 0.79310345 0.81818182 0.75824176 0.8
|
|
0.87058824 0.82352941 0.73684211 0.8045977 ]
|
|
|
|
mean value: 0.7952199253059875
|
|
|
|
key: train_jcc
|
|
value: [0.8267009 0.80867347 0.82028241 0.81505102 0.82860825 0.81690141
|
|
0.80076142 0.82051282 0.8125 0.81306018]
|
|
|
|
mean value: 0.8163051878697666
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.28453064 1.11170769 1.29114819 1.11578321 1.26954412 1.10291767
|
|
1.32249784 1.15211535 1.2166903 1.27844119]
|
|
|
|
mean value: 1.2145376205444336
|
|
|
|
key: score_time
|
|
value: [0.01480889 0.01508808 0.01512408 0.01507807 0.01351333 0.01501322
|
|
0.01514792 0.02332616 0.01545596 0.01541638]
|
|
|
|
mean value: 0.01579720973968506
|
|
|
|
key: test_mcc
|
|
value: [0.7009596 0.79084074 0.76582319 0.78337127 0.72691691 0.7566957
|
|
0.85972834 0.79185327 0.72881252 0.79056942]
|
|
|
|
mean value: 0.7695570951659244
|
|
|
|
key: train_mcc
|
|
value: [0.81936216 0.80175952 0.81380396 0.81201589 0.82381176 0.80620419
|
|
0.80025899 0.80759966 0.80785513 0.80039657]
|
|
|
|
mean value: 0.8093067817221927
|
|
|
|
key: test_accuracy
|
|
value: [0.8496732 0.89542484 0.88235294 0.88888889 0.8627451 0.87581699
|
|
0.92810458 0.89542484 0.86184211 0.89473684]
|
|
|
|
mean value: 0.883501031991744
|
|
|
|
key: train_accuracy
|
|
value: [0.90909091 0.90036364 0.90618182 0.90545455 0.91127273 0.90254545
|
|
0.89963636 0.90327273 0.90334302 0.8997093 ]
|
|
|
|
mean value: 0.9040870507399577
|
|
|
|
key: test_fscore
|
|
value: [0.85350318 0.89473684 0.88461538 0.89440994 0.86792453 0.88343558
|
|
0.93167702 0.89873418 0.86956522 0.8974359 ]
|
|
|
|
mean value: 0.8876037771122127
|
|
|
|
key: train_fscore
|
|
value: [0.91153574 0.90290574 0.9089626 0.90793201 0.91359773 0.90496454
|
|
0.90198864 0.90560681 0.90587403 0.90212766]
|
|
|
|
mean value: 0.9065495497430327
|
|
|
|
key: test_precision
|
|
value: [0.82716049 0.89473684 0.8625 0.84705882 0.84146341 0.8372093
|
|
0.89285714 0.87654321 0.82352941 0.875 ]
|
|
|
|
mean value: 0.8578058640919956
|
|
|
|
key: train_precision
|
|
value: [0.88827586 0.88105118 0.88340192 0.88535912 0.88965517 0.8824343
|
|
0.88072122 0.88365651 0.88275862 0.88088643]
|
|
|
|
mean value: 0.8838200325627028
|
|
|
|
key: test_recall
|
|
value: [0.88157895 0.89473684 0.90789474 0.94736842 0.8961039 0.93506494
|
|
0.97402597 0.92207792 0.92105263 0.92105263]
|
|
|
|
mean value: 0.9200956937799043
|
|
|
|
key: train_recall
|
|
value: [0.93604651 0.92587209 0.93604651 0.93168605 0.93886463 0.9286754
|
|
0.92430859 0.9286754 0.93023256 0.9244186 ]
|
|
|
|
mean value: 0.9304826343048644
|
|
|
|
key: test_roc_auc
|
|
value: [0.84988038 0.89542037 0.8825188 0.88926863 0.86252563 0.8754272
|
|
0.92780246 0.89524949 0.86184211 0.89473684]
|
|
|
|
mean value: 0.8834671907040328
|
|
|
|
key: train_roc_auc
|
|
value: [0.90907129 0.90034507 0.90616008 0.90543545 0.91129278 0.90256444
|
|
0.89965429 0.90329119 0.90334302 0.8997093 ]
|
|
|
|
mean value: 0.9040866930706476
|
|
|
|
key: test_jcc
|
|
value: [0.74444444 0.80952381 0.79310345 0.80898876 0.76666667 0.79120879
|
|
0.87209302 0.81609195 0.76923077 0.81395349]
|
|
|
|
mean value: 0.7985305159046182
|
|
|
|
key: train_jcc
|
|
value: [0.83745124 0.82299742 0.83311772 0.83138781 0.84093872 0.82642487
|
|
0.82147477 0.82749676 0.82794308 0.82170543]
|
|
|
|
mean value: 0.8290937811687371
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01994014 0.01431274 0.01398349 0.01401997 0.01395249 0.0145154
|
|
0.01382589 0.01382136 0.01351547 0.01353931]
|
|
|
|
mean value: 0.014542627334594726
|
|
|
|
key: score_time
|
|
value: [0.01291323 0.01000357 0.00988102 0.00977659 0.00964975 0.01053786
|
|
0.00961494 0.00956964 0.00955653 0.00958943]
|
|
|
|
mean value: 0.010109257698059083
|
|
|
|
key: test_mcc
|
|
value: [0.35331906 0.6066207 0.4543934 0.58636415 0.42599834 0.45716766
|
|
0.54858429 0.60879266 0.55339859 0.565301 ]
|
|
|
|
mean value: 0.5159939850095177
|
|
|
|
key: train_mcc
|
|
value: [0.55818859 0.5177552 0.52601313 0.52340198 0.53234078 0.52282598
|
|
0.51653888 0.51865034 0.52663644 0.52345697]
|
|
|
|
mean value: 0.5265808284353783
|
|
|
|
key: test_accuracy
|
|
value: [0.67320261 0.79084967 0.7254902 0.79084967 0.70588235 0.7254902
|
|
0.77124183 0.80392157 0.77631579 0.77631579]
|
|
|
|
mean value: 0.7539559683522532
|
|
|
|
key: train_accuracy
|
|
value: [0.77527273 0.75563636 0.75927273 0.75854545 0.76218182 0.75854545
|
|
0.75490909 0.75563636 0.75872093 0.75872093]
|
|
|
|
mean value: 0.7597441860465116
|
|
|
|
key: test_fscore
|
|
value: [0.63235294 0.75384615 0.70422535 0.77464789 0.66666667 0.70422535
|
|
0.75524476 0.8 0.77027027 0.75 ]
|
|
|
|
mean value: 0.7311479378753613
|
|
|
|
key: train_fscore
|
|
value: [0.75534442 0.73501577 0.73750991 0.73858268 0.73944223 0.73899371
|
|
0.73317498 0.73290938 0.73397436 0.73899371]
|
|
|
|
mean value: 0.7383941152463684
|
|
|
|
key: test_precision
|
|
value: [0.71666667 0.90740741 0.75757576 0.83333333 0.77586207 0.76923077
|
|
0.81818182 0.82191781 0.79166667 0.85 ]
|
|
|
|
mean value: 0.8041842296247115
|
|
|
|
key: train_precision
|
|
value: [0.82956522 0.80344828 0.81151832 0.80584192 0.81690141 0.8034188
|
|
0.80381944 0.80735552 0.81785714 0.80479452]
|
|
|
|
mean value: 0.8104520578615847
|
|
|
|
key: test_recall
|
|
value: [0.56578947 0.64473684 0.65789474 0.72368421 0.58441558 0.64935065
|
|
0.7012987 0.77922078 0.75 0.67105263]
|
|
|
|
mean value: 0.6727443609022556
|
|
|
|
key: train_recall
|
|
value: [0.69331395 0.67732558 0.67587209 0.68168605 0.67540029 0.68413392
|
|
0.67394469 0.67103348 0.66569767 0.68313953]
|
|
|
|
mean value: 0.6781547256355573
|
|
|
|
key: test_roc_auc
|
|
value: [0.67250513 0.78990089 0.72505126 0.79041353 0.70668148 0.72599111
|
|
0.77170198 0.80408407 0.77631579 0.77631579]
|
|
|
|
mean value: 0.753896103896104
|
|
|
|
key: train_roc_auc
|
|
value: [0.77533238 0.75569336 0.75933343 0.75860139 0.76211875 0.75849138
|
|
0.75485025 0.75557488 0.75872093 0.75872093]
|
|
|
|
mean value: 0.7597437671371992
|
|
|
|
key: test_jcc
|
|
value: [0.46236559 0.60493827 0.54347826 0.63218391 0.5 0.54347826
|
|
0.60674157 0.66666667 0.62637363 0.6 ]
|
|
|
|
mean value: 0.5786226158861896
|
|
|
|
key: train_jcc
|
|
value: [0.60687023 0.58104738 0.58417085 0.5855181 0.58659924 0.58603491
|
|
0.57875 0.57841907 0.57974684 0.58603491]
|
|
|
|
mean value: 0.5853191541061298
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01481962 0.01946259 0.01959991 0.01946759 0.01943946 0.02149749
|
|
0.02068472 0.02002621 0.02260518 0.0189991 ]
|
|
|
|
mean value: 0.019660186767578126
|
|
|
|
key: score_time
|
|
value: [0.01281857 0.01282787 0.01279497 0.01283956 0.01286054 0.0145731
|
|
0.01526475 0.01289558 0.01289058 0.01337099]
|
|
|
|
mean value: 0.013313651084899902
|
|
|
|
key: test_mcc
|
|
value: [0.4016051 0.58855388 0.56893632 0.64712919 0.49399158 0.55748
|
|
0.6485802 0.62147384 0.60547285 0.63355641]
|
|
|
|
mean value: 0.5766779371513958
|
|
|
|
key: train_mcc
|
|
value: [0.60379941 0.58113568 0.58027589 0.57304947 0.58844547 0.58205905
|
|
0.58920127 0.58692562 0.58724709 0.58712742]
|
|
|
|
mean value: 0.5859266375793112
|
|
|
|
key: test_accuracy
|
|
value: [0.69934641 0.79084967 0.78431373 0.82352941 0.74509804 0.77777778
|
|
0.82352941 0.81045752 0.80263158 0.81578947]
|
|
|
|
mean value: 0.7873323013415893
|
|
|
|
key: train_accuracy
|
|
value: [0.80145455 0.78981818 0.78909091 0.78545455 0.79345455 0.79054545
|
|
0.79418182 0.79272727 0.79287791 0.79287791]
|
|
|
|
mean value: 0.792248308668076
|
|
|
|
key: test_fscore
|
|
value: [0.67605634 0.77142857 0.77852349 0.82352941 0.73103448 0.77027027
|
|
0.81879195 0.80794702 0.80519481 0.80821918]
|
|
|
|
mean value: 0.7790995513636494
|
|
|
|
key: train_fscore
|
|
value: [0.7961165 0.78221552 0.77996965 0.77600607 0.78549849 0.78410795
|
|
0.78833209 0.78490566 0.78522984 0.78555305]
|
|
|
|
mean value: 0.7847934825734109
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.84375 0.79452055 0.81818182 0.77941176 0.8028169
|
|
0.84722222 0.82432432 0.79487179 0.84285714]
|
|
|
|
mean value: 0.8075229243789568
|
|
|
|
key: train_precision
|
|
value: [0.8187404 0.81220657 0.81587302 0.81240064 0.81632653 0.80834621
|
|
0.81076923 0.81504702 0.81533646 0.81435257]
|
|
|
|
mean value: 0.8139398657902496
|
|
|
|
key: test_recall
|
|
value: [0.63157895 0.71052632 0.76315789 0.82894737 0.68831169 0.74025974
|
|
0.79220779 0.79220779 0.81578947 0.77631579]
|
|
|
|
mean value: 0.7539302802460697
|
|
|
|
key: train_recall
|
|
value: [0.7747093 0.75436047 0.74709302 0.74273256 0.75691412 0.76128093
|
|
0.76710335 0.75691412 0.75726744 0.75872093]
|
|
|
|
mean value: 0.7577096239125284
|
|
|
|
key: test_roc_auc
|
|
value: [0.69890636 0.79032809 0.78417635 0.82356459 0.74547163 0.77802461
|
|
0.82373548 0.81057758 0.80263158 0.81578947]
|
|
|
|
mean value: 0.7873205741626794
|
|
|
|
key: train_roc_auc
|
|
value: [0.80147401 0.78984399 0.78912148 0.78548564 0.79342799 0.79052419
|
|
0.79416214 0.79270125 0.79287791 0.79287791]
|
|
|
|
mean value: 0.7922496487932027
|
|
|
|
key: test_jcc
|
|
value: [0.5106383 0.62790698 0.63736264 0.7 0.57608696 0.62637363
|
|
0.69318182 0.67777778 0.67391304 0.67816092]
|
|
|
|
mean value: 0.6401402053852616
|
|
|
|
key: train_jcc
|
|
value: [0.66129032 0.64232673 0.63930348 0.63399504 0.64676617 0.64488286
|
|
0.65061728 0.64596273 0.64640199 0.64684015]
|
|
|
|
mean value: 0.6458386755562313
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01773548 0.01330256 0.01375794 0.01433539 0.01339078 0.01321316
|
|
0.01307821 0.01311183 0.0127871 0.01467037]
|
|
|
|
mean value: 0.013938283920288086
|
|
|
|
key: score_time
|
|
value: [0.03969884 0.01717877 0.01783061 0.01724839 0.02037358 0.0177145
|
|
0.01740003 0.01754093 0.01748443 0.01792002]
|
|
|
|
mean value: 0.020039010047912597
|
|
|
|
key: test_mcc
|
|
value: [0.46940184 0.64712919 0.76499745 0.6255278 0.73875886 0.6006267
|
|
0.64745159 0.699419 0.59222009 0.69021144]
|
|
|
|
mean value: 0.6475743958019158
|
|
|
|
key: train_mcc
|
|
value: [0.78479904 0.741185 0.75987931 0.77331807 0.76984424 0.76966348
|
|
0.76230012 0.75645369 0.78049147 0.76875613]
|
|
|
|
mean value: 0.7666690525599327
|
|
|
|
key: test_accuracy
|
|
value: [0.73202614 0.82352941 0.88235294 0.81045752 0.86928105 0.79738562
|
|
0.82352941 0.8496732 0.78947368 0.84210526]
|
|
|
|
mean value: 0.8219814241486068
|
|
|
|
key: train_accuracy
|
|
value: [0.89090909 0.86909091 0.87854545 0.88509091 0.88363636 0.88363636
|
|
0.87927273 0.87636364 0.88880814 0.88226744]
|
|
|
|
mean value: 0.8817621035940804
|
|
|
|
key: test_fscore
|
|
value: [0.74846626 0.82352941 0.88311688 0.81987578 0.87179487 0.81212121
|
|
0.82802548 0.8496732 0.80952381 0.85185185]
|
|
|
|
mean value: 0.8297978754560947
|
|
|
|
key: train_fscore
|
|
value: [0.89554318 0.87482615 0.88362369 0.89012517 0.88811189 0.88795518
|
|
0.88488211 0.88210818 0.89337979 0.88812155]
|
|
|
|
mean value: 0.8868676889495757
|
|
|
|
key: test_precision
|
|
value: [0.70114943 0.81818182 0.87179487 0.77647059 0.86075949 0.76136364
|
|
0.8125 0.85526316 0.73913043 0.80232558]
|
|
|
|
mean value: 0.7998939007606557
|
|
|
|
key: train_precision
|
|
value: [0.85962567 0.83866667 0.84872825 0.85333333 0.85464334 0.85560054
|
|
0.84503311 0.84238411 0.85809906 0.84605263]
|
|
|
|
mean value: 0.8502166705438856
|
|
|
|
key: test_recall
|
|
value: [0.80263158 0.82894737 0.89473684 0.86842105 0.88311688 0.87012987
|
|
0.84415584 0.84415584 0.89473684 0.90789474]
|
|
|
|
mean value: 0.8638926862611073
|
|
|
|
key: train_recall
|
|
value: [0.93459302 0.91424419 0.92151163 0.93023256 0.92430859 0.92285298
|
|
0.9286754 0.92576419 0.93168605 0.93459302]
|
|
|
|
mean value: 0.9268461629599539
|
|
|
|
key: test_roc_auc
|
|
value: [0.73248462 0.82356459 0.88243336 0.8108339 0.86919002 0.79690704
|
|
0.82339371 0.8497095 0.78947368 0.84210526]
|
|
|
|
mean value: 0.8220095693779904
|
|
|
|
key: train_roc_auc
|
|
value: [0.8908773 0.86905805 0.87851418 0.88505805 0.88366592 0.88366486
|
|
0.87930863 0.87639954 0.88880814 0.88226744]
|
|
|
|
mean value: 0.8817622118411699
|
|
|
|
key: test_jcc
|
|
value: [0.59803922 0.7 0.79069767 0.69473684 0.77272727 0.68367347
|
|
0.70652174 0.73863636 0.68 0.74193548]
|
|
|
|
mean value: 0.7106968060962936
|
|
|
|
key: train_jcc
|
|
value: [0.81084489 0.77750309 0.79151061 0.80200501 0.79874214 0.79848866
|
|
0.79353234 0.78908189 0.80730479 0.79875776]
|
|
|
|
mean value: 0.7967771184699708
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10233808 0.10575843 0.10373473 0.10605693 0.10498071 0.10569978
|
|
0.1071682 0.10758281 0.08358073 0.08797741]
|
|
|
|
mean value: 0.10148777961730956
|
|
|
|
key: score_time
|
|
value: [0.03032064 0.03100991 0.03179431 0.03213096 0.03182483 0.03185129
|
|
0.03272891 0.03121138 0.02697086 0.02723646]
|
|
|
|
mean value: 0.030707955360412598
|
|
|
|
key: test_mcc
|
|
value: [0.55377262 0.76582319 0.7286962 0.76297229 0.70210289 0.732851
|
|
0.80939231 0.70070473 0.68451751 0.79388419]
|
|
|
|
mean value: 0.7234716918479318
|
|
|
|
key: train_mcc
|
|
value: [0.77064854 0.75945098 0.75893753 0.76481527 0.78585474 0.76215877
|
|
0.75256889 0.76060384 0.76842553 0.7639028 ]
|
|
|
|
mean value: 0.7647366884034111
|
|
|
|
key: test_accuracy
|
|
value: [0.77124183 0.88235294 0.8627451 0.87581699 0.8496732 0.8627451
|
|
0.90196078 0.8496732 0.83552632 0.89473684]
|
|
|
|
mean value: 0.8586472308221534
|
|
|
|
key: train_accuracy
|
|
value: [0.88290909 0.87709091 0.87709091 0.87927273 0.89018182 0.87854545
|
|
0.87345455 0.87709091 0.8815407 0.87936047]
|
|
|
|
mean value: 0.8796537526427062
|
|
|
|
key: test_fscore
|
|
value: [0.79041916 0.88461538 0.86792453 0.88484848 0.85714286 0.87272727
|
|
0.90797546 0.85534591 0.8502994 0.9 ]
|
|
|
|
mean value: 0.8671298462582523
|
|
|
|
key: train_fscore
|
|
value: [0.88919477 0.88400824 0.88368892 0.88661202 0.89621993 0.88506538
|
|
0.88065844 0.88448394 0.88812629 0.88598901]
|
|
|
|
mean value: 0.8864046931328189
|
|
|
|
key: test_precision
|
|
value: [0.72527473 0.8625 0.8313253 0.82022472 0.82142857 0.81818182
|
|
0.86046512 0.82926829 0.78021978 0.85714286]
|
|
|
|
mean value: 0.8206031181515692
|
|
|
|
key: train_precision
|
|
value: [0.84444444 0.83745124 0.83921569 0.83634021 0.84895833 0.83942559
|
|
0.83268482 0.83376289 0.84135241 0.83984375]
|
|
|
|
mean value: 0.8393479360298207
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.90789474 0.90789474 0.96052632 0.8961039 0.93506494
|
|
0.96103896 0.88311688 0.93421053 0.94736842]
|
|
|
|
mean value: 0.920164046479836
|
|
|
|
key: train_recall
|
|
value: [0.93895349 0.93604651 0.93313953 0.94331395 0.94905386 0.93595342
|
|
0.93449782 0.94177584 0.94040698 0.9375 ]
|
|
|
|
mean value: 0.9390641396702888
|
|
|
|
key: test_roc_auc
|
|
value: [0.77187286 0.8825188 0.86303828 0.87636705 0.84936774 0.86226931
|
|
0.90157211 0.84945318 0.83552632 0.89473684]
|
|
|
|
mean value: 0.8586722488038276
|
|
|
|
key: train_roc_auc
|
|
value: [0.8828683 0.877048 0.87705012 0.87922612 0.8902246 0.87858718
|
|
0.87349891 0.87713792 0.8815407 0.87936047]
|
|
|
|
mean value: 0.8796542305609154
|
|
|
|
key: test_jcc
|
|
value: [0.65346535 0.79310345 0.76666667 0.79347826 0.75 0.77419355
|
|
0.83146067 0.74725275 0.73958333 0.81818182]
|
|
|
|
mean value: 0.7667385843659047
|
|
|
|
key: train_jcc
|
|
value: [0.80049566 0.79212792 0.79161529 0.79631902 0.81195517 0.79382716
|
|
0.78676471 0.79289216 0.79876543 0.79531443]
|
|
|
|
mean value: 0.7960076942490064
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.42435646 3.32703495 3.87789416 3.76856327 3.39414501 3.3016696
|
|
4.98186564 5.03846812 3.40142083 4.48261976]
|
|
|
|
mean value: 3.8998037815093993
|
|
|
|
key: score_time
|
|
value: [0.01347184 0.01309609 0.01320553 0.01314592 0.02440691 0.01321507
|
|
0.01536345 0.01597309 0.01372814 0.01349449]
|
|
|
|
mean value: 0.014910054206848145
|
|
|
|
key: test_mcc
|
|
value: [0.679029 0.88613163 0.85055077 0.80967851 0.77781577 0.84767786
|
|
0.87042236 0.85682563 0.79639781 0.81117308]
|
|
|
|
mean value: 0.8185702422957642
|
|
|
|
key: train_mcc
|
|
value: [0.87814104 0.91340542 0.94487049 0.93258269 0.93168513 0.95345846
|
|
0.96953732 0.97528208 0.95058541 0.92752052]
|
|
|
|
mean value: 0.9377068557422232
|
|
|
|
key: test_accuracy
|
|
value: [0.82352941 0.94117647 0.92156863 0.90196078 0.88888889 0.92156863
|
|
0.93464052 0.92810458 0.89473684 0.90131579]
|
|
|
|
mean value: 0.9057490540075679
|
|
|
|
key: train_accuracy
|
|
value: [0.93672727 0.95563636 0.97236364 0.96581818 0.96581818 0.97672727
|
|
0.98472727 0.98763636 0.9752907 0.96293605]
|
|
|
|
mean value: 0.9683681289640592
|
|
|
|
key: test_fscore
|
|
value: [0.84571429 0.94339623 0.92592593 0.9068323 0.89032258 0.92592593
|
|
0.93670886 0.92993631 0.90123457 0.90797546]
|
|
|
|
mean value: 0.9113972437278951
|
|
|
|
key: train_fscore
|
|
value: [0.9398756 0.95713282 0.97262248 0.96659559 0.96596669 0.97667638
|
|
0.98461538 0.98765432 0.97525473 0.96400847]
|
|
|
|
mean value: 0.9690402475092297
|
|
|
|
key: test_precision
|
|
value: [0.74747475 0.90361446 0.87209302 0.85882353 0.88461538 0.88235294
|
|
0.91358025 0.9125 0.84883721 0.85057471]
|
|
|
|
mean value: 0.867446625262509
|
|
|
|
key: train_precision
|
|
value: [0.89591568 0.92653061 0.96428571 0.945758 0.9610951 0.97810219
|
|
0.99115044 0.98550725 0.97667638 0.93689986]
|
|
|
|
mean value: 0.9561921229439048
|
|
|
|
key: test_recall
|
|
value: [0.97368421 0.98684211 0.98684211 0.96052632 0.8961039 0.97402597
|
|
0.96103896 0.94805195 0.96052632 0.97368421]
|
|
|
|
mean value: 0.9621326042378674
|
|
|
|
key: train_recall
|
|
value: [0.98837209 0.98982558 0.98110465 0.98837209 0.97088792 0.97525473
|
|
0.97816594 0.98981077 0.97383721 0.99273256]
|
|
|
|
mean value: 0.9828363545580718
|
|
|
|
key: test_roc_auc
|
|
value: [0.82450444 0.941473 0.92199248 0.90234108 0.88884142 0.92122351
|
|
0.93446685 0.92797334 0.89473684 0.90131579]
|
|
|
|
mean value: 0.9058868762816131
|
|
|
|
key: train_roc_auc
|
|
value: [0.93668969 0.95561148 0.97235727 0.96580177 0.96582187 0.9767262
|
|
0.9847225 0.98763794 0.9752907 0.96293605]
|
|
|
|
mean value: 0.9683595469009173
|
|
|
|
key: test_jcc
|
|
value: [0.73267327 0.89285714 0.86206897 0.82954545 0.80232558 0.86206897
|
|
0.88095238 0.86904762 0.82022472 0.83146067]
|
|
|
|
mean value: 0.8383224770417589
|
|
|
|
key: train_jcc
|
|
value: [0.88657106 0.91778976 0.94670407 0.93535076 0.93417367 0.95441595
|
|
0.96969697 0.97560976 0.95170455 0.93051771]
|
|
|
|
mean value: 0.9402534243634338
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07970905 0.06549978 0.0600791 0.06302691 0.0628171 0.07530284
|
|
0.06135464 0.06667185 0.06252265 0.0646894 ]
|
|
|
|
mean value: 0.06616733074188233
|
|
|
|
key: score_time
|
|
value: [0.00958228 0.00950241 0.00941229 0.00940776 0.00999832 0.00944257
|
|
0.00988722 0.00979042 0.00971103 0.00971794]
|
|
|
|
mean value: 0.009645223617553711
|
|
|
|
key: test_mcc
|
|
value: [0.88305705 0.89668196 0.90042249 0.86959495 0.93471203 0.87398511
|
|
0.89813651 0.88843722 0.89597867 0.91177042]
|
|
|
|
mean value: 0.8952776407974935
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94117647 0.94771242 0.94771242 0.93464052 0.96732026 0.93464052
|
|
0.94771242 0.94117647 0.94736842 0.95394737]
|
|
|
|
mean value: 0.9463407292741658
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94193548 0.94871795 0.95 0.93506494 0.96774194 0.9382716
|
|
0.95 0.94478528 0.94871795 0.95597484]
|
|
|
|
mean value: 0.9481209975634859
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.92405063 0.925 0.9047619 0.92307692 0.96153846 0.89411765
|
|
0.91566265 0.89534884 0.925 0.91566265]
|
|
|
|
mean value: 0.9184219707761627
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.97368421 1. 0.94736842 0.97402597 0.98701299
|
|
0.98701299 1. 0.97368421 1. ]
|
|
|
|
mean value: 0.9803315105946685
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94130212 0.94788107 0.94805195 0.93472317 0.96727614 0.93429597
|
|
0.94745386 0.94078947 0.94736842 0.95394737]
|
|
|
|
mean value: 0.946308954203691
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8902439 0.90243902 0.9047619 0.87804878 0.9375 0.88372093
|
|
0.9047619 0.89534884 0.90243902 0.91566265]
|
|
|
|
mean value: 0.9014926959275397
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22882962 0.22922802 0.22770691 0.2276063 0.22846127 0.23116493
|
|
0.2195704 0.22250962 0.21304131 0.21495366]
|
|
|
|
mean value: 0.22430720329284667
|
|
|
|
key: score_time
|
|
value: [0.02048516 0.02189183 0.02146745 0.02119708 0.02161193 0.02104592
|
|
0.02112818 0.01984119 0.02014971 0.02019882]
|
|
|
|
mean value: 0.0209017276763916
|
|
|
|
key: test_mcc
|
|
value: [0.8319081 0.92285372 0.89823836 0.89574433 0.92280176 0.86208891
|
|
0.92186711 0.87189727 0.8183437 0.93623886]
|
|
|
|
mean value: 0.8881982124273637
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91503268 0.96078431 0.94771242 0.94771242 0.96078431 0.92810458
|
|
0.96078431 0.93464052 0.90789474 0.96710526]
|
|
|
|
mean value: 0.9430555555555555
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.91719745 0.96153846 0.94936709 0.94805195 0.96202532 0.93251534
|
|
0.96153846 0.9375 0.91139241 0.96815287]
|
|
|
|
mean value: 0.9449279337150104
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.9375 0.91463415 0.93589744 0.9382716 0.88372093
|
|
0.94936709 0.90361446 0.87804878 0.9382716 ]
|
|
|
|
mean value: 0.9168214938163615
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.98684211 0.98684211 0.96052632 0.98701299 0.98701299
|
|
0.97402597 0.97402597 0.94736842 1. ]
|
|
|
|
mean value: 0.9751025290498975
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91524265 0.96095352 0.94796651 0.94779563 0.96061176 0.92771702
|
|
0.9606972 0.93438141 0.90789474 0.96710526]
|
|
|
|
mean value: 0.9430365686944634
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.84705882 0.92592593 0.90361446 0.90123457 0.92682927 0.87356322
|
|
0.92592593 0.88235294 0.8372093 0.9382716 ]
|
|
|
|
mean value: 0.8961986036237635
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01631427 0.01626396 0.0164547 0.01587343 0.0148356 0.01484942
|
|
0.01600504 0.01600504 0.01513362 0.01451206]
|
|
|
|
mean value: 0.015624713897705079
|
|
|
|
key: score_time
|
|
value: [0.0095787 0.01043653 0.01051712 0.00969219 0.01043558 0.01000738
|
|
0.0094974 0.01046777 0.0094893 0.00953722]
|
|
|
|
mean value: 0.009965920448303222
|
|
|
|
key: test_mcc
|
|
value: [0.74443747 0.83598575 0.81252579 0.80095083 0.81558518 0.89813651
|
|
0.87398511 0.84767786 0.71136133 0.82034699]
|
|
|
|
mean value: 0.8160992827519362
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8627451 0.91503268 0.90196078 0.89542484 0.90196078 0.94771242
|
|
0.93464052 0.92156863 0.84868421 0.90789474]
|
|
|
|
mean value: 0.9037624699002408
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.87573964 0.91925466 0.90797546 0.90243902 0.91017964 0.95
|
|
0.9382716 0.92592593 0.86227545 0.9125 ]
|
|
|
|
mean value: 0.9104561408553007
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.79569892 0.87058824 0.85057471 0.84090909 0.84444444 0.91566265
|
|
0.89411765 0.88235294 0.79120879 0.86904762]
|
|
|
|
mean value: 0.8554605057116628
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97368421 0.97368421 0.97368421 0.97368421 0.98701299 0.98701299
|
|
0.98701299 0.97402597 0.94736842 0.96052632]
|
|
|
|
mean value: 0.9737696514012304
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86346548 0.91541353 0.90242652 0.89593301 0.90140123 0.94745386
|
|
0.93429597 0.92122351 0.84868421 0.90789474]
|
|
|
|
mean value: 0.9038192071086808
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77894737 0.85057471 0.83146067 0.82222222 0.83516484 0.9047619
|
|
0.88372093 0.86206897 0.75789474 0.83908046]
|
|
|
|
mean value: 0.8365896809733016
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.72849846 3.66784 3.71179175 3.77300262 3.71986294 3.68993902
|
|
3.75254583 3.72217226 3.73778081 3.73096752]
|
|
|
|
mean value: 3.7234401226043703
|
|
|
|
key: score_time
|
|
value: [0.10509872 0.10523677 0.11515713 0.11611629 0.10531783 0.11526346
|
|
0.10578632 0.11265159 0.10944152 0.10565376]
|
|
|
|
mean value: 0.10957233905792237
|
|
|
|
key: test_mcc
|
|
value: [0.85989239 0.94804308 0.89823836 0.92156528 0.96086141 0.91227016
|
|
0.9353409 0.92437574 0.87114007 0.91177042]
|
|
|
|
mean value: 0.9143497801246042
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92810458 0.97385621 0.94771242 0.96078431 0.98039216 0.95424837
|
|
0.96732026 0.96078431 0.93421053 0.95394737]
|
|
|
|
mean value: 0.9561360509115927
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93081761 0.97402597 0.94936709 0.96052632 0.98064516 0.95652174
|
|
0.96815287 0.9625 0.93670886 0.95597484]
|
|
|
|
mean value: 0.957524045867552
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89156627 0.96153846 0.91463415 0.96052632 0.97435897 0.91666667
|
|
0.95 0.92771084 0.90243902 0.91566265]
|
|
|
|
mean value: 0.9315103348121428
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97368421 0.98684211 0.98684211 0.96052632 0.98701299 1.
|
|
0.98701299 1. 0.97368421 1. ]
|
|
|
|
mean value: 0.9855604921394395
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92840055 0.97394053 0.94796651 0.96078264 0.9803486 0.95394737
|
|
0.9671907 0.96052632 0.93421053 0.95394737]
|
|
|
|
mean value: 0.9561261107313739
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.87058824 0.94936709 0.90361446 0.92405063 0.96202532 0.91666667
|
|
0.9382716 0.92771084 0.88095238 0.91566265]
|
|
|
|
mean value: 0.9188909877633349
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...05', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.39117789 1.34858465 1.38617587 1.3380537 1.32995343 1.3010633
|
|
1.32384777 1.34226727 1.31952024 1.36302686]
|
|
|
|
mean value: 1.3443670988082885
|
|
|
|
key: score_time
|
|
value: [0.29393315 0.21935678 0.27440143 0.29888606 0.28548169 0.30194235
|
|
0.28672743 0.29335928 0.26590633 0.286901 ]
|
|
|
|
mean value: 0.28068954944610597
|
|
|
|
key: test_mcc
|
|
value: [0.8319081 0.88243336 0.87051021 0.90921537 0.88241328 0.90029235
|
|
0.94802543 0.88299739 0.88534229 0.92233098]
|
|
|
|
mean value: 0.8915468752258947
|
|
|
|
key: train_mcc
|
|
value: [0.9434406 0.94778006 0.93910289 0.9422994 0.93939155 0.9465444
|
|
0.9409875 0.94647215 0.94225868 0.94525083]
|
|
|
|
mean value: 0.9433528055064919
|
|
|
|
key: test_accuracy
|
|
value: [0.91503268 0.94117647 0.93464052 0.95424837 0.94117647 0.94771242
|
|
0.97385621 0.94117647 0.94078947 0.96052632]
|
|
|
|
mean value: 0.9450335397316821
|
|
|
|
key: train_accuracy
|
|
value: [0.97163636 0.97381818 0.96945455 0.97090909 0.96945455 0.97309091
|
|
0.97018182 0.97309091 0.97093023 0.97238372]
|
|
|
|
mean value: 0.9714950317124735
|
|
|
|
key: test_fscore
|
|
value: [0.91719745 0.94117647 0.93589744 0.95483871 0.94193548 0.95061728
|
|
0.97435897 0.94267516 0.94339623 0.96153846]
|
|
|
|
mean value: 0.9463631657762174
|
|
|
|
key: train_fscore
|
|
value: [0.97192225 0.9740634 0.96978417 0.9713877 0.96991404 0.97343862
|
|
0.97069335 0.97340043 0.9713467 0.97281831]
|
|
|
|
mean value: 0.9718768978446657
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.93506494 0.9125 0.93670886 0.93589744 0.90588235
|
|
0.96202532 0.925 0.90361446 0.9375 ]
|
|
|
|
mean value: 0.9243082247838952
|
|
|
|
key: train_precision
|
|
value: [0.96291013 0.96571429 0.96011396 0.95633803 0.95486601 0.96033994
|
|
0.95365169 0.96164773 0.95762712 0.95774648]
|
|
|
|
mean value: 0.959095536437397
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.94736842 0.96052632 0.97368421 0.94805195 1.
|
|
0.98701299 0.96103896 0.98684211 0.98684211]
|
|
|
|
mean value: 0.9698735475051264
|
|
|
|
key: train_recall
|
|
value: [0.98110465 0.98255814 0.97965116 0.9869186 0.98544396 0.98689956
|
|
0.98835517 0.98544396 0.98546512 0.98837209]
|
|
|
|
mean value: 0.9850212416641279
|
|
|
|
key: test_roc_auc
|
|
value: [0.91524265 0.94121668 0.93480861 0.95437457 0.94113124 0.94736842
|
|
0.97376965 0.9410458 0.94078947 0.96052632]
|
|
|
|
mean value: 0.9450273410799727
|
|
|
|
key: train_roc_auc
|
|
value: [0.97162947 0.97381182 0.96944712 0.97089744 0.96946617 0.97310094
|
|
0.97019503 0.97309989 0.97093023 0.97238372]
|
|
|
|
mean value: 0.9714961832707085
|
|
|
|
key: test_jcc
|
|
value: [0.84705882 0.88888889 0.87951807 0.91358025 0.8902439 0.90588235
|
|
0.95 0.89156627 0.89285714 0.92592593]
|
|
|
|
mean value: 0.8985521620844548
|
|
|
|
key: train_jcc
|
|
value: [0.94537815 0.9494382 0.94134078 0.94436718 0.94158554 0.94825175
|
|
0.94305556 0.94817927 0.94428969 0.94707521]
|
|
|
|
mean value: 0.945296132575369
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.03125072 0.01916838 0.01938295 0.01927495 0.02168083 0.01942801
|
|
0.01928306 0.01930952 0.01898527 0.01894236]
|
|
|
|
mean value: 0.020670604705810548
|
|
|
|
key: score_time
|
|
value: [0.01347709 0.01279616 0.01302552 0.01292491 0.01287913 0.01277995
|
|
0.01284385 0.01278806 0.01284838 0.01289606]
|
|
|
|
mean value: 0.01292591094970703
|
|
|
|
key: test_mcc
|
|
value: [0.4016051 0.58855388 0.56893632 0.64712919 0.49399158 0.55748
|
|
0.6485802 0.62147384 0.60547285 0.63355641]
|
|
|
|
mean value: 0.5766779371513958
|
|
|
|
key: train_mcc
|
|
value: [0.60379941 0.58113568 0.58027589 0.57304947 0.58844547 0.58205905
|
|
0.58920127 0.58692562 0.58724709 0.58712742]
|
|
|
|
mean value: 0.5859266375793112
|
|
|
|
key: test_accuracy
|
|
value: [0.69934641 0.79084967 0.78431373 0.82352941 0.74509804 0.77777778
|
|
0.82352941 0.81045752 0.80263158 0.81578947]
|
|
|
|
mean value: 0.7873323013415893
|
|
|
|
key: train_accuracy
|
|
value: [0.80145455 0.78981818 0.78909091 0.78545455 0.79345455 0.79054545
|
|
0.79418182 0.79272727 0.79287791 0.79287791]
|
|
|
|
mean value: 0.792248308668076
|
|
|
|
key: test_fscore
|
|
value: [0.67605634 0.77142857 0.77852349 0.82352941 0.73103448 0.77027027
|
|
0.81879195 0.80794702 0.80519481 0.80821918]
|
|
|
|
mean value: 0.7790995513636494
|
|
|
|
key: train_fscore
|
|
value: [0.7961165 0.78221552 0.77996965 0.77600607 0.78549849 0.78410795
|
|
0.78833209 0.78490566 0.78522984 0.78555305]
|
|
|
|
mean value: 0.7847934825734109
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.84375 0.79452055 0.81818182 0.77941176 0.8028169
|
|
0.84722222 0.82432432 0.79487179 0.84285714]
|
|
|
|
mean value: 0.8075229243789568
|
|
|
|
key: train_precision
|
|
value: [0.8187404 0.81220657 0.81587302 0.81240064 0.81632653 0.80834621
|
|
0.81076923 0.81504702 0.81533646 0.81435257]
|
|
|
|
mean value: 0.8139398657902496
|
|
|
|
key: test_recall
|
|
value: [0.63157895 0.71052632 0.76315789 0.82894737 0.68831169 0.74025974
|
|
0.79220779 0.79220779 0.81578947 0.77631579]
|
|
|
|
mean value: 0.7539302802460697
|
|
|
|
key: train_recall
|
|
value: [0.7747093 0.75436047 0.74709302 0.74273256 0.75691412 0.76128093
|
|
0.76710335 0.75691412 0.75726744 0.75872093]
|
|
|
|
mean value: 0.7577096239125284
|
|
|
|
key: test_roc_auc
|
|
value: [0.69890636 0.79032809 0.78417635 0.82356459 0.74547163 0.77802461
|
|
0.82373548 0.81057758 0.80263158 0.81578947]
|
|
|
|
mean value: 0.7873205741626794
|
|
|
|
key: train_roc_auc
|
|
value: [0.80147401 0.78984399 0.78912148 0.78548564 0.79342799 0.79052419
|
|
0.79416214 0.79270125 0.79287791 0.79287791]
|
|
|
|
mean value: 0.7922496487932027
|
|
|
|
key: test_jcc
|
|
value: [0.5106383 0.62790698 0.63736264 0.7 0.57608696 0.62637363
|
|
0.69318182 0.67777778 0.67391304 0.67816092]
|
|
|
|
mean value: 0.6401402053852616
|
|
|
|
key: train_jcc
|
|
value: [0.66129032 0.64232673 0.63930348 0.63399504 0.64676617 0.64488286
|
|
0.65061728 0.64596273 0.64640199 0.64684015]
|
|
|
|
mean value: 0.6458386755562313
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.37299275 0.1863327 0.22527552 0.1957078 0.20946264 0.21796036
|
|
0.22040057 0.19989061 0.20104051 0.30317235]
|
|
|
|
mean value: 0.2332235813140869
|
|
|
|
key: score_time
|
|
value: [0.0117445 0.01154876 0.01167345 0.0127697 0.01279545 0.01185679
|
|
0.01267433 0.01265073 0.01233268 0.01173425]
|
|
|
|
mean value: 0.01217806339263916
|
|
|
|
key: test_mcc
|
|
value: [0.8580978 0.94804308 0.92445054 0.93471203 0.96151265 0.91227016
|
|
0.92280176 0.92437574 0.92233098 0.92393644]
|
|
|
|
mean value: 0.923253118916282
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92810458 0.97385621 0.96078431 0.96732026 0.98039216 0.95424837
|
|
0.96078431 0.96078431 0.96052632 0.96052632]
|
|
|
|
mean value: 0.9607327141382869
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92993631 0.97402597 0.96202532 0.96688742 0.98089172 0.95652174
|
|
0.96202532 0.9625 0.96153846 0.96202532]
|
|
|
|
mean value: 0.9618377566758209
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90123457 0.96153846 0.92682927 0.97333333 0.9625 0.91666667
|
|
0.9382716 0.92771084 0.9375 0.92682927]
|
|
|
|
mean value: 0.9372414014336827
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.98684211 1. 0.96052632 1. 1.
|
|
0.98701299 1. 0.98684211 1. ]
|
|
|
|
mean value: 0.988174982911825
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92831511 0.97394053 0.96103896 0.96727614 0.98026316 0.95394737
|
|
0.96061176 0.96052632 0.96052632 0.96052632]
|
|
|
|
mean value: 0.9606971975393028
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.86904762 0.94936709 0.92682927 0.93589744 0.9625 0.91666667
|
|
0.92682927 0.92771084 0.92592593 0.92682927]
|
|
|
|
mean value: 0.9267603384396785
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06015706 0.09460282 0.1018455 0.09277034 0.09146309 0.08434844
|
|
0.08717299 0.10056043 0.08586311 0.08778548]
|
|
|
|
mean value: 0.08865692615509033
|
|
|
|
key: score_time
|
|
value: [0.01276851 0.01975513 0.0128932 0.02014208 0.0131669 0.0130353
|
|
0.03918862 0.01591039 0.02436423 0.02613568]
|
|
|
|
mean value: 0.019736003875732423
|
|
|
|
key: test_mcc
|
|
value: [0.65416089 0.79114682 0.74096527 0.8057184 0.75464471 0.73952007
|
|
0.79317956 0.77934127 0.69290233 0.79056942]
|
|
|
|
mean value: 0.7542148735801545
|
|
|
|
key: train_mcc
|
|
value: [0.79846743 0.79727037 0.79952315 0.79491988 0.79111343 0.79592494
|
|
0.78991041 0.79509392 0.8061747 0.79983363]
|
|
|
|
mean value: 0.7968231873002005
|
|
|
|
key: test_accuracy
|
|
value: [0.82352941 0.89542484 0.86928105 0.90196078 0.87581699 0.8627451
|
|
0.89542484 0.88888889 0.84210526 0.89473684]
|
|
|
|
mean value: 0.8749914000687995
|
|
|
|
key: train_accuracy
|
|
value: [0.89818182 0.89745455 0.89890909 0.896 0.89454545 0.89672727
|
|
0.89381818 0.89672727 0.90188953 0.89898256]
|
|
|
|
mean value: 0.8973235729386893
|
|
|
|
key: test_fscore
|
|
value: [0.83435583 0.8961039 0.87341772 0.9044586 0.88198758 0.87573964
|
|
0.9 0.89308176 0.85365854 0.8974359 ]
|
|
|
|
mean value: 0.8810239462207575
|
|
|
|
key: train_fscore
|
|
value: [0.90182328 0.9013296 0.90218156 0.90034843 0.8981026 0.90056022
|
|
0.89761571 0.89985896 0.90552834 0.90231904]
|
|
|
|
mean value: 0.900966775191467
|
|
|
|
key: test_precision
|
|
value: [0.7816092 0.88461538 0.84146341 0.87654321 0.8452381 0.80434783
|
|
0.86746988 0.86585366 0.79545455 0.875 ]
|
|
|
|
mean value: 0.8437595209362628
|
|
|
|
key: train_precision
|
|
value: [0.87127371 0.86909582 0.8744884 0.8647925 0.86820652 0.86774629
|
|
0.86603518 0.87277702 0.8731444 0.87346939]
|
|
|
|
mean value: 0.8701029234584499
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.90789474 0.90789474 0.93421053 0.92207792 0.96103896
|
|
0.93506494 0.92207792 0.92105263 0.92105263]
|
|
|
|
mean value: 0.9227101845522898
|
|
|
|
key: train_recall
|
|
value: [0.93459302 0.93604651 0.93168605 0.93895349 0.930131 0.93595342
|
|
0.93158661 0.9286754 0.94040698 0.93313953]
|
|
|
|
mean value: 0.9341172015165363
|
|
|
|
key: test_roc_auc
|
|
value: [0.8239918 0.89550581 0.86953178 0.9021702 0.87551265 0.86209843
|
|
0.89516405 0.88867054 0.84210526 0.89473684]
|
|
|
|
mean value: 0.8749487354750513
|
|
|
|
key: train_roc_auc
|
|
value: [0.89815532 0.89742646 0.89888524 0.89596874 0.89457132 0.89675578
|
|
0.89384563 0.89675049 0.90188953 0.89898256]
|
|
|
|
mean value: 0.8973231060221387
|
|
|
|
key: test_jcc
|
|
value: [0.71578947 0.81176471 0.7752809 0.8255814 0.78888889 0.77894737
|
|
0.81818182 0.80681818 0.74468085 0.81395349]
|
|
|
|
mean value: 0.787988707053767
|
|
|
|
key: train_jcc
|
|
value: [0.82120051 0.82038217 0.82179487 0.81875792 0.81505102 0.81910828
|
|
0.81424936 0.81794872 0.82736573 0.82202305]
|
|
|
|
mean value: 0.8197881628429927
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01789331 0.0185709 0.02602816 0.02372169 0.02248359 0.01925993
|
|
0.01907539 0.01859188 0.01864552 0.01883435]
|
|
|
|
mean value: 0.020310473442077637
|
|
|
|
key: score_time
|
|
value: [0.01262403 0.0127635 0.01286173 0.01281047 0.01262879 0.01322031
|
|
0.01306748 0.01348448 0.01301289 0.01288962]
|
|
|
|
mean value: 0.01293632984161377
|
|
|
|
key: test_mcc
|
|
value: [0.50333397 0.6006267 0.60782638 0.5826209 0.57001111 0.52939166
|
|
0.63397129 0.64706889 0.66366484 0.59339083]
|
|
|
|
mean value: 0.5931906573435123
|
|
|
|
key: train_mcc
|
|
value: [0.6233567 0.60000677 0.58984303 0.60152218 0.61317921 0.59419517
|
|
0.59569489 0.60445049 0.60205207 0.60618213]
|
|
|
|
mean value: 0.6030482656429118
|
|
|
|
key: test_accuracy
|
|
value: [0.75163399 0.79738562 0.80392157 0.79084967 0.78431373 0.76470588
|
|
0.81699346 0.82352941 0.82894737 0.79605263]
|
|
|
|
mean value: 0.7958333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.81163636 0.8 0.79490909 0.80072727 0.80654545 0.79709091
|
|
0.79781818 0.80218182 0.80087209 0.80305233]
|
|
|
|
mean value: 0.8014833509513742
|
|
|
|
key: test_fscore
|
|
value: [0.74666667 0.78014184 0.80263158 0.79487179 0.77852349 0.76623377
|
|
0.81818182 0.82580645 0.83950617 0.78911565]
|
|
|
|
mean value: 0.7941679229516845
|
|
|
|
key: train_fscore
|
|
value: [0.81025641 0.79970867 0.79416058 0.79941435 0.80469897 0.79620161
|
|
0.79618768 0.80029369 0.79763663 0.8014652 ]
|
|
|
|
mean value: 0.8000023791631959
|
|
|
|
key: test_precision
|
|
value: [0.75675676 0.84615385 0.80263158 0.775 0.80555556 0.76623377
|
|
0.81818182 0.82051282 0.79069767 0.81690141]
|
|
|
|
mean value: 0.7998625225211241
|
|
|
|
key: train_precision
|
|
value: [0.816839 0.80145985 0.79765396 0.80530973 0.81185185 0.79912023
|
|
0.80206795 0.80740741 0.81081081 0.80797637]
|
|
|
|
mean value: 0.8060497160861249
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.72368421 0.80263158 0.81578947 0.75324675 0.76623377
|
|
0.81818182 0.83116883 0.89473684 0.76315789]
|
|
|
|
mean value: 0.7905673274094327
|
|
|
|
key: train_recall
|
|
value: [0.80377907 0.79796512 0.79069767 0.79360465 0.79767103 0.79330422
|
|
0.79039301 0.79330422 0.78488372 0.79505814]
|
|
|
|
mean value: 0.7940660861175992
|
|
|
|
key: test_roc_auc
|
|
value: [0.75153794 0.79690704 0.80391319 0.79101162 0.78451811 0.76469583
|
|
0.81698565 0.82347915 0.82894737 0.79605263]
|
|
|
|
mean value: 0.7958048530416952
|
|
|
|
key: train_roc_auc
|
|
value: [0.81164208 0.80000148 0.79491216 0.80073246 0.80653901 0.79708816
|
|
0.79781279 0.80217537 0.80087209 0.80305233]
|
|
|
|
mean value: 0.8014827908669307
|
|
|
|
key: test_jcc
|
|
value: [0.59574468 0.63953488 0.67032967 0.65957447 0.63736264 0.62105263
|
|
0.69230769 0.7032967 0.72340426 0.65168539]
|
|
|
|
mean value: 0.6594293016110327
|
|
|
|
key: train_jcc
|
|
value: [0.68103448 0.66626214 0.65859564 0.66585366 0.67321867 0.66140777
|
|
0.66138855 0.66707466 0.66339066 0.66870416]
|
|
|
|
mean value: 0.6666930392893675
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02874899 0.04127073 0.03305984 0.03164721 0.03479314 0.03873873
|
|
0.03585958 0.03636575 0.03846884 0.03605771]
|
|
|
|
mean value: 0.03550105094909668
|
|
|
|
key: score_time
|
|
value: [0.01310873 0.01315308 0.01326323 0.0129993 0.01311874 0.01312613
|
|
0.01317024 0.01301074 0.0130794 0.01309705]
|
|
|
|
mean value: 0.013112664222717285
|
|
|
|
key: test_mcc
|
|
value: [0.59510808 0.537945 0.6655466 0.78337127 0.48681012 0.46950467
|
|
0.64191059 0.80555346 0.61612324 0.65465367]
|
|
|
|
mean value: 0.6256526677406429
|
|
|
|
key: train_mcc
|
|
value: [0.76902552 0.53400444 0.73824447 0.76851025 0.62256707 0.48542757
|
|
0.64037142 0.7805469 0.72285901 0.69872249]
|
|
|
|
mean value: 0.6760279148410566
|
|
|
|
key: test_accuracy
|
|
value: [0.79738562 0.73202614 0.83006536 0.88888889 0.71895425 0.69934641
|
|
0.79738562 0.90196078 0.78947368 0.81578947]
|
|
|
|
mean value: 0.7971276229790162
|
|
|
|
key: train_accuracy
|
|
value: [0.88436364 0.73309091 0.86690909 0.88145455 0.79418182 0.69672727
|
|
0.79636364 0.88872727 0.85319767 0.84084302]
|
|
|
|
mean value: 0.82358588794926
|
|
|
|
key: test_fscore
|
|
value: [0.79194631 0.63716814 0.81690141 0.89440994 0.6446281 0.59649123
|
|
0.83060109 0.90566038 0.82022472 0.78787879]
|
|
|
|
mean value: 0.7725910101134962
|
|
|
|
key: train_fscore
|
|
value: [0.88282977 0.64745437 0.85933897 0.88827964 0.75369887 0.57054583
|
|
0.82864137 0.89337979 0.86727989 0.82122449]
|
|
|
|
mean value: 0.8012673000071253
|
|
|
|
key: test_precision
|
|
value: [0.80821918 0.97297297 0.87878788 0.84705882 0.88636364 0.91891892
|
|
0.71698113 0.87804878 0.71568627 0.92857143]
|
|
|
|
mean value: 0.855160902429952
|
|
|
|
key: train_precision
|
|
value: [0.89536622 0.95467422 0.91190865 0.84046693 0.93722944 0.97535211
|
|
0.71488912 0.85695187 0.79136691 0.93668529]
|
|
|
|
mean value: 0.8814890751499358
|
|
|
|
key: test_recall
|
|
value: [0.77631579 0.47368421 0.76315789 0.94736842 0.50649351 0.44155844
|
|
0.98701299 0.93506494 0.96052632 0.68421053]
|
|
|
|
mean value: 0.7475393028024607
|
|
|
|
key: train_recall
|
|
value: [0.87063953 0.48982558 0.8125 0.94186047 0.63027656 0.40320233
|
|
0.98544396 0.93304221 0.95930233 0.73110465]
|
|
|
|
mean value: 0.7757197623641718
|
|
|
|
key: test_roc_auc
|
|
value: [0.7972488 0.7303486 0.8296309 0.88926863 0.72035202 0.70104238
|
|
0.79613807 0.90174299 0.78947368 0.81578947]
|
|
|
|
mean value: 0.7971035543403964
|
|
|
|
key: train_roc_auc
|
|
value: [0.88437362 0.73326796 0.86694869 0.88141058 0.7940627 0.69651396
|
|
0.79650105 0.88875948 0.85319767 0.84084302]
|
|
|
|
mean value: 0.8235878736332555
|
|
|
|
key: test_jcc
|
|
value: [0.65555556 0.46753247 0.69047619 0.80898876 0.47560976 0.425
|
|
0.71028037 0.82758621 0.6952381 0.65 ]
|
|
|
|
mean value: 0.6406267409673141
|
|
|
|
key: train_jcc
|
|
value: [0.79023747 0.47869318 0.75336927 0.79901356 0.6047486 0.39913545
|
|
0.70741902 0.80730479 0.76566125 0.6966759 ]
|
|
|
|
mean value: 0.6802258491448824
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04885817 0.06108356 0.04959345 0.06227493 0.06916451 0.04803848
|
|
0.05732489 0.04631138 0.04698443 0.04601908]
|
|
|
|
mean value: 0.05356528759002686
|
|
|
|
key: score_time
|
|
value: [0.01142812 0.01302767 0.01295257 0.01305866 0.01149654 0.01306248
|
|
0.01305008 0.01297808 0.0129528 0.01302671]
|
|
|
|
mean value: 0.012703371047973634
|
|
|
|
key: test_mcc
|
|
value: [0.55263333 0.74806111 0.74096527 0.77842376 0.6898344 0.72556391
|
|
0.72161625 0.46417827 0.61847459 0.78792836]
|
|
|
|
mean value: 0.6827679261132394
|
|
|
|
key: train_mcc
|
|
value: [0.74820554 0.73030265 0.81671837 0.81240426 0.80182691 0.80340203
|
|
0.76378911 0.47279889 0.81519787 0.77274103]
|
|
|
|
mean value: 0.7537386655064744
|
|
|
|
key: test_accuracy
|
|
value: [0.77124183 0.86928105 0.86928105 0.88888889 0.84313725 0.8627451
|
|
0.85620915 0.68627451 0.80921053 0.88815789]
|
|
|
|
mean value: 0.8344427244582043
|
|
|
|
key: train_accuracy
|
|
value: [0.86909091 0.85527273 0.90763636 0.90618182 0.89818182 0.90109091
|
|
0.87854545 0.68436364 0.90697674 0.88226744]
|
|
|
|
mean value: 0.8689607822410148
|
|
|
|
key: test_fscore
|
|
value: [0.74452555 0.87804878 0.87341772 0.89032258 0.85185185 0.8627451
|
|
0.84507042 0.76 0.81045752 0.8969697 ]
|
|
|
|
mean value: 0.8413409215833054
|
|
|
|
key: train_fscore
|
|
value: [0.85759494 0.87035831 0.91037403 0.90672451 0.90371389 0.89820359
|
|
0.86983632 0.7594235 0.90433483 0.8902439 ]
|
|
|
|
mean value: 0.8770807824981575
|
|
|
|
key: test_precision
|
|
value: [0.83606557 0.81818182 0.84146341 0.87341772 0.81176471 0.86842105
|
|
0.92307692 0.61788618 0.80519481 0.83146067]
|
|
|
|
mean value: 0.8226932867910196
|
|
|
|
key: train_precision
|
|
value: [0.94097222 0.78866588 0.88477366 0.90215827 0.85658409 0.92449923
|
|
0.93624161 0.61324978 0.93076923 0.83375635]
|
|
|
|
mean value: 0.8611670324057497
|
|
|
|
key: test_recall
|
|
value: [0.67105263 0.94736842 0.90789474 0.90789474 0.8961039 0.85714286
|
|
0.77922078 0.98701299 0.81578947 0.97368421]
|
|
|
|
mean value: 0.8743164730006835
|
|
|
|
key: train_recall
|
|
value: [0.7877907 0.97093023 0.9375 0.91133721 0.95633188 0.87336245
|
|
0.81222707 0.99708879 0.87936047 0.95494186]
|
|
|
|
mean value: 0.9080870654344809
|
|
|
|
key: test_roc_auc
|
|
value: [0.77059125 0.86978811 0.86953178 0.8890123 0.84278879 0.86278195
|
|
0.85671565 0.68429597 0.80921053 0.88815789]
|
|
|
|
mean value: 0.8342874231032126
|
|
|
|
key: train_roc_auc
|
|
value: [0.86915008 0.85518855 0.90761463 0.90617807 0.89822408 0.90107076
|
|
0.87849726 0.68459091 0.90697674 0.88226744]
|
|
|
|
mean value: 0.868975851359128
|
|
|
|
key: test_jcc
|
|
value: [0.59302326 0.7826087 0.7752809 0.80232558 0.74193548 0.75862069
|
|
0.73170732 0.61290323 0.68131868 0.81318681]
|
|
|
|
mean value: 0.7292910642649137
|
|
|
|
key: train_jcc
|
|
value: [0.75069252 0.7704729 0.83549223 0.82936508 0.82434128 0.81521739
|
|
0.76965517 0.61215371 0.82537517 0.8021978 ]
|
|
|
|
mean value: 0.7834963248076051
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.4111495 0.41222382 0.40148973 0.39327574 0.38667727 0.38184023
|
|
0.3957026 0.39085674 0.39540768 0.39997125]
|
|
|
|
mean value: 0.39685945510864257
|
|
|
|
key: score_time
|
|
value: [0.01832533 0.0185864 0.01854658 0.01706672 0.0177567 0.01830125
|
|
0.01850367 0.01710153 0.01774406 0.01812792]
|
|
|
|
mean value: 0.018006014823913574
|
|
|
|
key: test_mcc
|
|
value: [0.80741391 0.88241328 0.87051021 0.90857826 0.89542037 0.82137062
|
|
0.88299739 0.86927546 0.89504682 0.82901914]
|
|
|
|
mean value: 0.8662045447560733
|
|
|
|
key: train_mcc
|
|
value: [0.91893262 0.91582498 0.92593581 0.92384624 0.91155299 0.91020525
|
|
0.90005851 0.92492924 0.92473515 0.92507941]
|
|
|
|
mean value: 0.9181100200852328
|
|
|
|
key: test_accuracy
|
|
value: [0.90196078 0.94117647 0.93464052 0.95424837 0.94771242 0.90849673
|
|
0.94117647 0.93464052 0.94736842 0.91447368]
|
|
|
|
mean value: 0.9325894392844857
|
|
|
|
key: train_accuracy
|
|
value: [0.95927273 0.95781818 0.96290909 0.96145455 0.95563636 0.95490909
|
|
0.94981818 0.96218182 0.9622093 0.9622093 ]
|
|
|
|
mean value: 0.9588418604651163
|
|
|
|
key: test_fscore
|
|
value: [0.90566038 0.94039735 0.93589744 0.95424837 0.94805195 0.91358025
|
|
0.94267516 0.93506494 0.94805195 0.91503268]
|
|
|
|
mean value: 0.9338660447319018
|
|
|
|
key: train_fscore
|
|
value: [0.95988539 0.95827338 0.96322999 0.9623312 0.95614666 0.95552367
|
|
0.95053763 0.96280401 0.96269727 0.96291013]
|
|
|
|
mean value: 0.9594339334557482
|
|
|
|
key: test_precision
|
|
value: [0.86746988 0.94666667 0.9125 0.94805195 0.94805195 0.87058824
|
|
0.925 0.93506494 0.93589744 0.90909091]
|
|
|
|
mean value: 0.9198381957636033
|
|
|
|
key: train_precision
|
|
value: [0.94632768 0.94871795 0.95565093 0.94158554 0.94460227 0.94200849
|
|
0.93644068 0.94655415 0.95042493 0.94537815]
|
|
|
|
mean value: 0.9457690764480635
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.93421053 0.96052632 0.96052632 0.94805195 0.96103896
|
|
0.96103896 0.93506494 0.96052632 0.92105263]
|
|
|
|
mean value: 0.9489405331510594
|
|
|
|
key: train_recall
|
|
value: [0.97383721 0.96802326 0.97093023 0.98401163 0.96797671 0.96943231
|
|
0.9650655 0.97962154 0.9752907 0.98110465]
|
|
|
|
mean value: 0.97352937442876
|
|
|
|
key: test_roc_auc
|
|
value: [0.90225564 0.94113124 0.93480861 0.95428913 0.94771018 0.90815106
|
|
0.9410458 0.93463773 0.94736842 0.91447368]
|
|
|
|
mean value: 0.9325871496924129
|
|
|
|
key: train_roc_auc
|
|
value: [0.95926213 0.95781075 0.96290325 0.96143813 0.95564533 0.95491965
|
|
0.94982926 0.96219449 0.9622093 0.9622093 ]
|
|
|
|
mean value: 0.9588421600487458
|
|
|
|
key: test_jcc
|
|
value: [0.82758621 0.8875 0.87951807 0.9125 0.90123457 0.84090909
|
|
0.89156627 0.87804878 0.90123457 0.84337349]
|
|
|
|
mean value: 0.8763471045421218
|
|
|
|
key: train_jcc
|
|
value: [0.92286501 0.9198895 0.92906815 0.92739726 0.91597796 0.91483516
|
|
0.9057377 0.92827586 0.92807746 0.92847318]
|
|
|
|
mean value: 0.9220597252763751
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.23764133 0.26128435 0.26210427 0.16436124 0.26209378 0.26295519
|
|
0.25348902 0.25875854 0.26292038 0.26839256]
|
|
|
|
mean value: 0.24940006732940673
|
|
|
|
key: score_time
|
|
value: [0.02955842 0.02750564 0.0386343 0.03923464 0.03463292 0.04241014
|
|
0.03889441 0.03670263 0.04313731 0.03837514]
|
|
|
|
mean value: 0.03690855503082276
|
|
|
|
key: test_mcc
|
|
value: [0.85055077 0.8842875 0.90042249 0.92156528 0.94802543 0.90029235
|
|
0.91040218 0.91227016 0.89597867 0.88349301]
|
|
|
|
mean value: 0.9007287839876353
|
|
|
|
key: train_mcc
|
|
value: [0.99563741 0.99709091 0.99564583 0.99709512 0.99563742 0.99564585
|
|
0.99854651 0.99709513 0.99564059 0.992742 ]
|
|
|
|
mean value: 0.9960776765455992
|
|
|
|
key: test_accuracy
|
|
value: [0.92156863 0.94117647 0.94771242 0.96078431 0.97385621 0.94771242
|
|
0.95424837 0.95424837 0.94736842 0.94078947]
|
|
|
|
mean value: 0.9489465084279326
|
|
|
|
key: train_accuracy
|
|
value: [0.99781818 0.99854545 0.99781818 0.99854545 0.99781818 0.99781818
|
|
0.99927273 0.99854545 0.99781977 0.99636628]
|
|
|
|
mean value: 0.9980367864693446
|
|
|
|
key: test_fscore
|
|
value: [0.92592593 0.94267516 0.95 0.96052632 0.97435897 0.95061728
|
|
0.95597484 0.95652174 0.94871795 0.94267516]
|
|
|
|
mean value: 0.9507993349112008
|
|
|
|
key: train_fscore
|
|
value: [0.99782135 0.99854651 0.99782451 0.99854862 0.99781818 0.99782135
|
|
0.99927273 0.99854651 0.99782135 0.99635834]
|
|
|
|
mean value: 0.9980379455742069
|
|
|
|
key: test_precision
|
|
value: [0.87209302 0.91358025 0.9047619 0.96052632 0.96202532 0.90588235
|
|
0.92682927 0.91666667 0.925 0.91358025]
|
|
|
|
mean value: 0.9200945341990575
|
|
|
|
key: train_precision
|
|
value: [0.99709724 0.99854651 0.99565847 0.99710145 0.99709302 0.99565217
|
|
0.99854651 0.99709724 0.99709724 0.99854015]
|
|
|
|
mean value: 0.9972430008817535
|
|
|
|
key: test_recall
|
|
value: [0.98684211 0.97368421 1. 0.96052632 0.98701299 1.
|
|
0.98701299 1. 0.97368421 0.97368421]
|
|
|
|
mean value: 0.9842447026657553
|
|
|
|
key: train_recall
|
|
value: [0.99854651 0.99854651 1. 1. 0.9985444 1.
|
|
1. 1. 0.99854651 0.99418605]
|
|
|
|
mean value: 0.9988369977319658
|
|
|
|
key: test_roc_auc
|
|
value: [0.92199248 0.94138756 0.94805195 0.96078264 0.97376965 0.94736842
|
|
0.95403281 0.95394737 0.94736842 0.94078947]
|
|
|
|
mean value: 0.9489490772385509
|
|
|
|
key: train_roc_auc
|
|
value: [0.99781765 0.99854545 0.99781659 0.9985444 0.99781871 0.99781977
|
|
0.99927326 0.99854651 0.99781977 0.99636628]
|
|
|
|
mean value: 0.9980368386310551
|
|
|
|
key: test_jcc
|
|
value: [0.86206897 0.89156627 0.9047619 0.92405063 0.95 0.90588235
|
|
0.91566265 0.91666667 0.90243902 0.89156627]
|
|
|
|
mean value: 0.9064664727911517
|
|
|
|
key: train_jcc
|
|
value: [0.99565217 0.99709724 0.99565847 0.99710145 0.99564586 0.99565217
|
|
0.99854651 0.99709724 0.99565217 0.99274311]
|
|
|
|
mean value: 0.9960846402915284
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.93949819 0.91885471 0.87848139 0.89286852 0.90375113 0.9048965
|
|
0.86058402 0.85154843 0.80388546 0.78980708]
|
|
|
|
mean value: 0.8744175434112549
|
|
|
|
key: score_time
|
|
value: [0.06611633 0.068856 0.07026267 0.06683969 0.06403923 0.0679698
|
|
0.06959128 0.03493404 0.04684305 0.06526351]
|
|
|
|
mean value: 0.06207156181335449
|
|
|
|
key: test_mcc
|
|
value: [0.70444953 0.88613163 0.79338963 0.75973749 0.80718653 0.75095187
|
|
0.84423266 0.80448212 0.74620251 0.79639781]
|
|
|
|
mean value: 0.7893161776714138
|
|
|
|
key: train_mcc
|
|
value: [0.95087046 0.95520033 0.94773605 0.9408013 0.93498314 0.95378235
|
|
0.9437143 0.94796228 0.9582143 0.94666202]
|
|
|
|
mean value: 0.9479926534081287
|
|
|
|
key: test_accuracy
|
|
value: [0.8496732 0.94117647 0.89542484 0.87581699 0.90196078 0.86928105
|
|
0.92156863 0.90196078 0.86842105 0.89473684]
|
|
|
|
mean value: 0.8920020639834881
|
|
|
|
key: train_accuracy
|
|
value: [0.97527273 0.97745455 0.97381818 0.97018182 0.96727273 0.97672727
|
|
0.97163636 0.97381818 0.97892442 0.97311047]
|
|
|
|
mean value: 0.9738216701902749
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.94339623 0.89873418 0.88343558 0.9068323 0.88095238
|
|
0.92405063 0.9044586 0.87804878 0.90123457]
|
|
|
|
mean value: 0.8978286102710801
|
|
|
|
key: train_fscore
|
|
value: [0.97560976 0.97774587 0.97402597 0.9706514 0.96774194 0.97701149
|
|
0.97204301 0.97413793 0.97921147 0.97351467]
|
|
|
|
mean value: 0.9741693513550056
|
|
|
|
key: test_precision
|
|
value: [0.81176471 0.90361446 0.86585366 0.82758621 0.86904762 0.81318681
|
|
0.90123457 0.8875 0.81818182 0.84883721]
|
|
|
|
mean value: 0.8546807056766625
|
|
|
|
key: train_precision
|
|
value: [0.9631728 0.96595745 0.96704871 0.95627645 0.95338983 0.96453901
|
|
0.95762712 0.96170213 0.96605375 0.95909732]
|
|
|
|
mean value: 0.9614864559946508
|
|
|
|
key: test_recall
|
|
value: [0.90789474 0.98684211 0.93421053 0.94736842 0.94805195 0.96103896
|
|
0.94805195 0.92207792 0.94736842 0.96052632]
|
|
|
|
mean value: 0.9463431305536568
|
|
|
|
key: train_recall
|
|
value: [0.98837209 0.98982558 0.98110465 0.98546512 0.98253275 0.98981077
|
|
0.98689956 0.98689956 0.99273256 0.98837209]
|
|
|
|
mean value: 0.9872014742222673
|
|
|
|
key: test_roc_auc
|
|
value: [0.85005126 0.941473 0.89567669 0.87628161 0.90165755 0.86867738
|
|
0.9213944 0.90182843 0.86842105 0.89473684]
|
|
|
|
mean value: 0.8920198222829802
|
|
|
|
key: train_roc_auc
|
|
value: [0.97526319 0.97744554 0.97381288 0.97017069 0.96728382 0.97673678
|
|
0.97164746 0.97382769 0.97892442 0.97311047]
|
|
|
|
mean value: 0.9738222935919569
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.89285714 0.81609195 0.79120879 0.82954545 0.78723404
|
|
0.85882353 0.8255814 0.7826087 0.82022472]
|
|
|
|
mean value: 0.8154175724701468
|
|
|
|
key: train_jcc
|
|
value: [0.95238095 0.95646067 0.94936709 0.94297636 0.9375 0.95505618
|
|
0.94560669 0.94957983 0.95926966 0.94839609]
|
|
|
|
mean value: 0.9496593535225601
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.75407767 1.77305412 1.75941229 1.76050472 1.71600533 1.71455717
|
|
1.71563768 1.729002 1.77231836 1.75531816]
|
|
|
|
mean value: 1.744988751411438
|
|
|
|
key: score_time
|
|
value: [0.01084566 0.01031995 0.01009679 0.01003838 0.01001048 0.00993323
|
|
0.00989151 0.0102756 0.01095891 0.01059937]
|
|
|
|
mean value: 0.010296988487243652
|
|
|
|
key: test_mcc
|
|
value: [0.85055077 0.8842875 0.90042249 0.90857826 0.94771018 0.86507529
|
|
0.91040218 0.90916914 0.92233098 0.89753825]
|
|
|
|
mean value: 0.8996065062229006
|
|
|
|
key: train_mcc
|
|
value: [0.97835643 0.9669141 0.98121629 0.9769295 0.97253788 0.96982556
|
|
0.9684967 0.97111087 0.9726807 0.97119289]
|
|
|
|
mean value: 0.9729260927897064
|
|
|
|
key: test_accuracy
|
|
value: [0.92156863 0.94117647 0.94771242 0.95424837 0.97385621 0.92810458
|
|
0.95424837 0.95424837 0.96052632 0.94736842]
|
|
|
|
mean value: 0.9483058135534915
|
|
|
|
key: train_accuracy
|
|
value: [0.98909091 0.98327273 0.99054545 0.98836364 0.98618182 0.98472727
|
|
0.984 0.98545455 0.98619186 0.98546512]
|
|
|
|
mean value: 0.986329334038055
|
|
|
|
key: test_fscore
|
|
value: [0.92592593 0.94267516 0.95 0.95424837 0.97402597 0.93333333
|
|
0.95597484 0.95541401 0.96153846 0.94936709]
|
|
|
|
mean value: 0.950250316418618
|
|
|
|
key: train_fscore
|
|
value: [0.98920086 0.98351254 0.99062725 0.98848921 0.98630137 0.98492462
|
|
0.98424069 0.98559078 0.98636037 0.98563218]
|
|
|
|
mean value: 0.9864879886389764
|
|
|
|
key: test_precision
|
|
value: [0.87209302 0.91358025 0.9047619 0.94805195 0.97402597 0.875
|
|
0.92682927 0.9375 0.9375 0.91463415]
|
|
|
|
mean value: 0.9203976511643367
|
|
|
|
key: train_precision
|
|
value: [0.98002853 0.97029703 0.98283262 0.97863248 0.97714286 0.97167139
|
|
0.96897038 0.97574893 0.97446809 0.97443182]
|
|
|
|
mean value: 0.9754224116482623
|
|
|
|
key: test_recall
|
|
value: [0.98684211 0.97368421 1. 0.96052632 0.97402597 1.
|
|
0.98701299 0.97402597 0.98684211 0.98684211]
|
|
|
|
mean value: 0.9829801777170198
|
|
|
|
key: train_recall
|
|
value: [0.99854651 0.99709302 0.99854651 0.99854651 0.99563319 0.9985444
|
|
1. 0.99563319 0.99854651 0.99709302]
|
|
|
|
mean value: 0.9978182864493416
|
|
|
|
key: test_roc_auc
|
|
value: [0.92199248 0.94138756 0.94805195 0.95428913 0.97385509 0.92763158
|
|
0.95403281 0.95411825 0.96052632 0.94736842]
|
|
|
|
mean value: 0.9483253588516747
|
|
|
|
key: train_roc_auc
|
|
value: [0.98908403 0.98326267 0.99053963 0.98835623 0.98618869 0.98473731
|
|
0.98401163 0.98546194 0.98619186 0.98546512]
|
|
|
|
mean value: 0.9863299101249111
|
|
|
|
key: test_jcc
|
|
value: [0.86206897 0.89156627 0.9047619 0.9125 0.94936709 0.875
|
|
0.91566265 0.91463415 0.92592593 0.90361446]
|
|
|
|
mean value: 0.9055101404648106
|
|
|
|
key: train_jcc
|
|
value: [0.97863248 0.96755994 0.98142857 0.9772404 0.97297297 0.97029703
|
|
0.96897038 0.97159091 0.97308782 0.97167139]
|
|
|
|
mean value: 0.9733451891320364
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0451107 0.049371 0.04994035 0.05092955 0.06280994 0.06473494
|
|
0.06431675 0.05826092 0.05142689 0.05108881]
|
|
|
|
mean value: 0.05479898452758789
|
|
|
|
key: score_time
|
|
value: [0.01398492 0.01401734 0.01437211 0.01475191 0.02301121 0.01799297
|
|
0.01673698 0.01483917 0.01456618 0.0142796 ]
|
|
|
|
mean value: 0.015855240821838378
|
|
|
|
key: test_mcc
|
|
value: [0.19258404 0.23335786 0.26272017 0.22590614 0.28015007 0.25163936
|
|
0.26617701 0.20335531 0.11065667 0.26537245]
|
|
|
|
mean value: 0.2291919072128585
|
|
|
|
key: train_mcc
|
|
value: [0.25848695 0.24707391 0.24373835 0.24373835 0.24338408 0.2467148
|
|
0.25001123 0.25164703 0.25819889 0.24346738]
|
|
|
|
mean value: 0.24864609729796233
|
|
|
|
key: test_accuracy
|
|
value: [0.54901961 0.54901961 0.5620915 0.55555556 0.5751634 0.5620915
|
|
0.56862745 0.54248366 0.51973684 0.56578947]
|
|
|
|
mean value: 0.5549578603371172
|
|
|
|
key: train_accuracy
|
|
value: [0.56290909 0.55781818 0.55636364 0.55636364 0.55563636 0.55709091
|
|
0.55854545 0.55927273 0.5625 0.5559593 ]
|
|
|
|
mean value: 0.5582459302325582
|
|
|
|
key: test_fscore
|
|
value: [0.68202765 0.68778281 0.69406393 0.68807339 0.70319635 0.69683258
|
|
0.7 0.6875 0.67264574 0.69724771]
|
|
|
|
mean value: 0.6909370149185318
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
train_fscore
|
|
value: [0.69600405 0.69354839 0.69284995 0.69284995 0.69219144 0.69288956
|
|
0.6935891 0.69393939 0.69565217 0.69250126]
|
|
|
|
mean value: 0.6936015252412605
|
|
|
|
key: test_precision
|
|
value: [0.5248227 0.52413793 0.53146853 0.52816901 0.54225352 0.53472222
|
|
0.53846154 0.52380952 0.51020408 0.53521127]
|
|
|
|
mean value: 0.5293260326481314
|
|
|
|
key: train_precision
|
|
value: [0.53374709 0.5308642 0.53004622 0.53004622 0.52927581 0.53009259
|
|
0.5309119 0.53132251 0.53333333 0.52963818]
|
|
|
|
mean value: 0.5309278063184852
|
|
|
|
key: test_recall
|
|
value: [0.97368421 1. 1. 0.98684211 1. 1.
|
|
1. 1. 0.98684211 1. ]
|
|
|
|
mean value: 0.9947368421052631
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.55177717 0.55194805 0.56493506 0.55835612 0.57236842 0.55921053
|
|
0.56578947 0.53947368 0.51973684 0.56578947]
|
|
|
|
mean value: 0.5549384825700615
|
|
|
|
key: train_roc_auc
|
|
value: [0.56259098 0.55749636 0.55604076 0.55604076 0.5559593 0.55741279
|
|
0.55886628 0.55959302 0.5625 0.5559593 ]
|
|
|
|
mean value: 0.5582459547747198
|
|
|
|
key: test_jcc
|
|
value: [0.51748252 0.52413793 0.53146853 0.52447552 0.54225352 0.53472222
|
|
0.53846154 0.52380952 0.50675676 0.53521127]
|
|
|
|
mean value: 0.5278779334443492
|
|
|
|
key: train_jcc
|
|
value: [0.53374709 0.5308642 0.53004622 0.53004622 0.52927581 0.53009259
|
|
0.5309119 0.53132251 0.53333333 0.52963818]
|
|
|
|
mean value: 0.5309278063184852
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03136492 0.02282596 0.02320504 0.02319527 0.04907656 0.03571248
|
|
0.04129052 0.02282357 0.02203798 0.02177525]
|
|
|
|
mean value: 0.029330754280090333
|
|
|
|
key: score_time
|
|
value: [0.02549243 0.01337242 0.01352215 0.01372337 0.0274179 0.02305341
|
|
0.02517033 0.01329613 0.01303434 0.01286769]
|
|
|
|
mean value: 0.018095016479492188
|
|
|
|
key: test_mcc
|
|
value: [0.63046972 0.75226544 0.73965143 0.79533886 0.71446184 0.783044
|
|
0.83176564 0.79185327 0.68109062 0.7642171 ]
|
|
|
|
mean value: 0.7484157932905766
|
|
|
|
key: train_mcc
|
|
value: [0.79006528 0.78955238 0.78604973 0.77926666 0.78911947 0.7907965
|
|
0.77790937 0.78788129 0.78404999 0.78621363]
|
|
|
|
mean value: 0.7860904312640132
|
|
|
|
key: test_accuracy
|
|
value: [0.81045752 0.87581699 0.86928105 0.89542484 0.85620915 0.88888889
|
|
0.91503268 0.89542484 0.83552632 0.88157895]
|
|
|
|
mean value: 0.8723641210870313
|
|
|
|
key: train_accuracy
|
|
value: [0.89381818 0.89381818 0.89236364 0.88872727 0.89381818 0.89454545
|
|
0.888 0.89309091 0.89098837 0.89244186]
|
|
|
|
mean value: 0.8921612050739958
|
|
|
|
key: test_fscore
|
|
value: [0.82424242 0.87741935 0.87179487 0.9 0.8625 0.89570552
|
|
0.91823899 0.89873418 0.84848485 0.88461538]
|
|
|
|
mean value: 0.8781735576374513
|
|
|
|
key: train_fscore
|
|
value: [0.8979021 0.89747191 0.89548023 0.89248067 0.89689266 0.89781536
|
|
0.89170183 0.89640592 0.89481066 0.89548023]
|
|
|
|
mean value: 0.8956441560180751
|
|
|
|
key: test_precision
|
|
value: [0.76404494 0.86075949 0.85 0.85714286 0.8313253 0.84883721
|
|
0.8902439 0.87654321 0.78651685 0.8625 ]
|
|
|
|
mean value: 0.8427913771389265
|
|
|
|
key: train_precision
|
|
value: [0.86522911 0.86820652 0.87087912 0.86394558 0.87105624 0.87021858
|
|
0.86258503 0.86885246 0.86449864 0.87087912]
|
|
|
|
mean value: 0.8676350410918827
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.89473684 0.89473684 0.94736842 0.8961039 0.94805195
|
|
0.94805195 0.92207792 0.92105263 0.90789474]
|
|
|
|
mean value: 0.9174812030075188
|
|
|
|
key: train_recall
|
|
value: [0.93313953 0.92877907 0.92151163 0.92296512 0.92430859 0.9272198
|
|
0.92285298 0.92576419 0.92732558 0.92151163]
|
|
|
|
mean value: 0.9255378118547104
|
|
|
|
key: test_roc_auc
|
|
value: [0.81100478 0.87593985 0.86944634 0.89576213 0.85594668 0.88849966
|
|
0.91481545 0.89524949 0.83552632 0.88157895]
|
|
|
|
mean value: 0.8723769651401231
|
|
|
|
key: train_roc_auc
|
|
value: [0.89378956 0.89379274 0.89234242 0.88870235 0.89384034 0.8945692
|
|
0.88802533 0.89311465 0.89098837 0.89244186]
|
|
|
|
mean value: 0.8921606834568904
|
|
|
|
key: test_jcc
|
|
value: [0.70103093 0.7816092 0.77272727 0.81818182 0.75824176 0.81111111
|
|
0.84883721 0.81609195 0.73684211 0.79310345]
|
|
|
|
mean value: 0.7837776800363645
|
|
|
|
key: train_jcc
|
|
value: [0.81472081 0.81401274 0.81074169 0.80583756 0.81306018 0.81457801
|
|
0.80456853 0.81226054 0.80964467 0.81074169]
|
|
|
|
mean value: 0.8110166409187562
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: /home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.37235236 0.50291133 0.38311815 0.48266912 0.35823059 0.46319032
|
|
0.41518831 0.50740194 0.40587974 0.41645098]
|
|
|
|
mean value: 0.43073928356170654
|
|
|
|
key: score_time
|
|
value: [0.01303744 0.03433681 0.01306701 0.02781892 0.02044153 0.01993084
|
|
0.01990438 0.0199182 0.02032948 0.01985645]
|
|
|
|
mean value: 0.020864105224609374
|
|
|
|
key: test_mcc
|
|
value: [0.63046972 0.75226544 0.73965143 0.79533886 0.71446184 0.75929586
|
|
0.83345743 0.79185327 0.68109062 0.78974719]
|
|
|
|
mean value: 0.7487631670218652
|
|
|
|
key: train_mcc
|
|
value: [0.79006528 0.78955238 0.78604973 0.77926666 0.78911947 0.80092379
|
|
0.79217899 0.78788129 0.78404999 0.79814553]
|
|
|
|
mean value: 0.7897233120399463
|
|
|
|
key: test_accuracy
|
|
value: [0.81045752 0.87581699 0.86928105 0.89542484 0.85620915 0.87581699
|
|
0.91503268 0.89542484 0.83552632 0.89473684]
|
|
|
|
mean value: 0.8723727210182318
|
|
|
|
key: train_accuracy
|
|
value: [0.89381818 0.89381818 0.89236364 0.88872727 0.89381818 0.89963636
|
|
0.89527273 0.89309091 0.89098837 0.89825581]
|
|
|
|
mean value: 0.8939789640591966
|
|
|
|
key: test_fscore
|
|
value: [0.82424242 0.87741935 0.87179487 0.9 0.8625 0.88484848
|
|
0.91925466 0.89873418 0.84848485 0.8961039 ]
|
|
|
|
mean value: 0.8783382715913518
|
|
|
|
key: train_fscore
|
|
value: [0.8979021 0.89747191 0.89548023 0.89248067 0.89689266 0.90267983
|
|
0.89844852 0.89640592 0.89481066 0.90140845]
|
|
|
|
mean value: 0.8973980943342379
|
|
|
|
key: test_precision
|
|
value: [0.76404494 0.86075949 0.85 0.85714286 0.8313253 0.82954545
|
|
0.88095238 0.87654321 0.78651685 0.88461538]
|
|
|
|
mean value: 0.8421445879761135
|
|
|
|
key: train_precision
|
|
value: [0.86522911 0.86820652 0.87087912 0.86394558 0.87105624 0.875513
|
|
0.87140903 0.86885246 0.86449864 0.87431694]
|
|
|
|
mean value: 0.8693906641305642
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.89473684 0.89473684 0.94736842 0.8961039 0.94805195
|
|
0.96103896 0.92207792 0.92105263 0.90789474]
|
|
|
|
mean value: 0.9187799043062201
|
|
|
|
key: train_recall
|
|
value: [0.93313953 0.92877907 0.92151163 0.92296512 0.92430859 0.93158661
|
|
0.9272198 0.92576419 0.92732558 0.93023256]
|
|
|
|
mean value: 0.9272832673233811
|
|
|
|
key: test_roc_auc
|
|
value: [0.81100478 0.87593985 0.86944634 0.89576213 0.85594668 0.87534176
|
|
0.91473001 0.89524949 0.83552632 0.89473684]
|
|
|
|
mean value: 0.8723684210526316
|
|
|
|
key: train_roc_auc
|
|
value: [0.89378956 0.89379274 0.89234242 0.88870235 0.89384034 0.89965958
|
|
0.89529594 0.89311465 0.89098837 0.89825581]
|
|
|
|
mean value: 0.8939781786330862
|
|
|
|
key: test_jcc
|
|
value: [0.70103093 0.7816092 0.77272727 0.81818182 0.75824176 0.79347826
|
|
0.85057471 0.81609195 0.73684211 0.81176471]
|
|
|
|
mean value: 0.7840542711069942
|
|
|
|
key: train_jcc
|
|
value: [0.81472081 0.81401274 0.81074169 0.80583756 0.81306018 0.82262211
|
|
0.815621 0.81226054 0.80964467 0.82051282]
|
|
|
|
mean value: 0.8139034115375714
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03561974 0.03608418 0.03647733 0.06793594 0.0644722 0.05805612
|
|
0.04845262 0.0465107 0.03675675 0.04629493]
|
|
|
|
mean value: 0.04766604900360107
|
|
|
|
key: score_time
|
|
value: [0.01293945 0.01210594 0.01297355 0.01302552 0.01463747 0.01526356
|
|
0.01309085 0.01314807 0.02174902 0.01311755]
|
|
|
|
mean value: 0.014205098152160645
|
|
|
|
key: test_mcc
|
|
value: [0.71921182 0.68850906 0.71921182 0.6317806 0.64450339 0.60753044
|
|
0.79385662 0.54446551 0.67900461 0.64285714]
|
|
|
|
mean value: 0.6670931004431977
|
|
|
|
key: train_mcc
|
|
value: [0.76147482 0.76445385 0.7453 0.76775773 0.74917035 0.78896845
|
|
0.76570464 0.78107876 0.76083968 0.78195539]
|
|
|
|
mean value: 0.7666703668771755
|
|
|
|
key: test_accuracy
|
|
value: [0.85964912 0.84210526 0.85964912 0.80701754 0.82142857 0.80357143
|
|
0.89285714 0.76785714 0.83928571 0.82142857]
|
|
|
|
mean value: 0.831484962406015
|
|
|
|
key: train_accuracy
|
|
value: [0.87968442 0.8816568 0.87179487 0.88362919 0.87401575 0.89370079
|
|
0.88188976 0.88976378 0.87992126 0.88976378]
|
|
|
|
mean value: 0.8825820404106292
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.84745763 0.86206897 0.83076923 0.82758621 0.80701754
|
|
0.9 0.74509804 0.84210526 0.82142857]
|
|
|
|
mean value: 0.8340674305106327
|
|
|
|
key: train_fscore
|
|
value: [0.88425047 0.88505747 0.87571702 0.88543689 0.87739464 0.89694656
|
|
0.88593156 0.89312977 0.88291747 0.89393939]
|
|
|
|
mean value: 0.8860721247238658
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.80645161 0.86206897 0.75 0.8 0.79310345
|
|
0.84375 0.82608696 0.82758621 0.82142857]
|
|
|
|
mean value: 0.8187618618686049
|
|
|
|
key: train_precision
|
|
value: [0.85347985 0.8619403 0.84814815 0.87022901 0.85447761 0.87037037
|
|
0.85661765 0.86666667 0.86142322 0.86131387]
|
|
|
|
mean value: 0.8604666693392132
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.89285714 0.86206897 0.93103448 0.85714286 0.82142857
|
|
0.96428571 0.67857143 0.85714286 0.82142857]
|
|
|
|
mean value: 0.8543103448275862
|
|
|
|
key: train_recall
|
|
value: [0.91732283 0.90944882 0.90513834 0.90118577 0.9015748 0.92519685
|
|
0.91732283 0.92125984 0.90551181 0.92913386]
|
|
|
|
mean value: 0.9133095764215244
|
|
|
|
key: test_roc_auc
|
|
value: [0.85960591 0.8429803 0.85960591 0.80480296 0.82142857 0.80357143
|
|
0.89285714 0.76785714 0.83928571 0.82142857]
|
|
|
|
mean value: 0.8313423645320197
|
|
|
|
key: train_roc_auc
|
|
value: [0.87961003 0.88160188 0.87186051 0.88366375 0.87401575 0.89370079
|
|
0.88188976 0.88976378 0.87992126 0.88976378]
|
|
|
|
mean value: 0.882579129189879
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.73529412 0.75757576 0.71052632 0.70588235 0.67647059
|
|
0.81818182 0.59375 0.72727273 0.6969697 ]
|
|
|
|
mean value: 0.7171923374613003
|
|
|
|
key: train_jcc
|
|
value: [0.79251701 0.79381443 0.77891156 0.79442509 0.78156997 0.81314879
|
|
0.79522184 0.80689655 0.79037801 0.80821918]
|
|
|
|
mean value: 0.7955102426006514
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.81602025 0.92681646 0.8532722 0.81648064 0.96134233 0.81097412
|
|
0.81866097 0.9349134 0.84393263 0.92439151]
|
|
|
|
mean value: 0.8706804513931274
|
|
|
|
key: score_time
|
|
value: [0.01221561 0.01229739 0.01223397 0.01220155 0.01228786 0.01219797
|
|
0.01218033 0.01216388 0.01215792 0.01236916]
|
|
|
|
mean value: 0.012230563163757324
|
|
|
|
key: test_mcc
|
|
value: [0.79110556 0.61805122 0.68434084 0.6317806 0.68250015 0.60753044
|
|
0.82195294 0.5118907 0.67900461 0.67900461]
|
|
|
|
mean value: 0.6707161661003566
|
|
|
|
key: train_mcc
|
|
value: [0.70052638 0.7323036 0.72879588 0.72424268 0.72521925 0.74917035
|
|
0.68512433 0.74128435 0.74128435 0.72521925]
|
|
|
|
mean value: 0.7253170414217583
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.80701754 0.84210526 0.80701754 0.83928571 0.80357143
|
|
0.91071429 0.75 0.83928571 0.83928571]
|
|
|
|
mean value: 0.8333020050125313
|
|
|
|
key: train_accuracy
|
|
value: [0.85009862 0.86587771 0.86390533 0.86193294 0.86220472 0.87401575
|
|
0.84251969 0.87007874 0.87007874 0.86220472]
|
|
|
|
mean value: 0.8622916957865474
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.88888889 0.81355932 0.84745763 0.83076923 0.84745763 0.80701754
|
|
0.9122807 0.72 0.84210526 0.83636364]
|
|
|
|
mean value: 0.8345899841064872
|
|
|
|
key: train_fscore
|
|
value: [0.85271318 0.86872587 0.86705202 0.86381323 0.86538462 0.87739464
|
|
0.84375 0.87356322 0.87356322 0.86538462]
|
|
|
|
mean value: 0.8651344603279979
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.77419355 0.83333333 0.75 0.80645161 0.79310345
|
|
0.89655172 0.81818182 0.82758621 0.85185185]
|
|
|
|
mean value: 0.8274330467044594
|
|
|
|
key: train_precision
|
|
value: [0.83969466 0.85227273 0.84586466 0.85057471 0.84586466 0.85447761
|
|
0.8372093 0.85074627 0.85074627 0.84586466]
|
|
|
|
mean value: 0.8473315532946674
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.85714286 0.86206897 0.93103448 0.89285714 0.82142857
|
|
0.92857143 0.64285714 0.85714286 0.82142857]
|
|
|
|
mean value: 0.847167487684729
|
|
|
|
key: train_recall
|
|
value: [0.86614173 0.88582677 0.88932806 0.87747036 0.88582677 0.9015748
|
|
0.8503937 0.8976378 0.8976378 0.88582677]
|
|
|
|
mean value: 0.8837664560704616
|
|
|
|
key: test_roc_auc
|
|
value: [0.89408867 0.80788177 0.84174877 0.80480296 0.83928571 0.80357143
|
|
0.91071429 0.75 0.83928571 0.83928571]
|
|
|
|
mean value: 0.8330665024630541
|
|
|
|
key: train_roc_auc
|
|
value: [0.85006691 0.86583829 0.86395537 0.86196352 0.86220472 0.87401575
|
|
0.84251969 0.87007874 0.87007874 0.86220472]
|
|
|
|
mean value: 0.8622926457315365
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.68571429 0.73529412 0.71052632 0.73529412 0.67647059
|
|
0.83870968 0.5625 0.72727273 0.71875 ]
|
|
|
|
mean value: 0.7190531829725253
|
|
|
|
key: train_jcc
|
|
value: [0.74324324 0.76791809 0.76530612 0.76027397 0.76271186 0.78156997
|
|
0.72972973 0.7755102 0.7755102 0.76271186]
|
|
|
|
mean value: 0.7624485259609025
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01503515 0.01330733 0.01064587 0.01132655 0.01142097 0.01167536
|
|
0.01149845 0.01156235 0.01134682 0.01025176]
|
|
|
|
mean value: 0.011807060241699219
|
|
|
|
key: score_time
|
|
value: [0.01238966 0.00971293 0.00975966 0.00989866 0.00983214 0.00979614
|
|
0.00988173 0.00982308 0.00948858 0.00915813]
|
|
|
|
mean value: 0.009974074363708497
|
|
|
|
key: test_mcc
|
|
value: [0.48627961 0.43881637 0.48258116 0.47413793 0.68250015 0.4330127
|
|
0.42857143 0.2981424 0.39310793 0.4272046 ]
|
|
|
|
mean value: 0.4544354273405089
|
|
|
|
key: train_mcc
|
|
value: [0.50437297 0.51016518 0.504271 0.50252706 0.52922073 0.51097092
|
|
0.49452718 0.49686572 0.48947301 0.50907612]
|
|
|
|
mean value: 0.5051469898368793
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.71929825 0.73684211 0.73684211 0.83928571 0.71428571
|
|
0.71428571 0.64285714 0.69642857 0.69642857]
|
|
|
|
mean value: 0.7233395989974938
|
|
|
|
key: train_accuracy
|
|
value: [0.74753452 0.75147929 0.7495069 0.74753452 0.75984252 0.7519685
|
|
0.74606299 0.74606299 0.74212598 0.7519685 ]
|
|
|
|
mean value: 0.749408672288745
|
|
|
|
key: test_fscore
|
|
value: [0.69387755 0.7037037 0.71698113 0.73684211 0.83018868 0.69230769
|
|
0.71428571 0.58333333 0.69090909 0.62222222]
|
|
|
|
mean value: 0.6984651224366077
|
|
|
|
key: train_fscore
|
|
value: [0.72173913 0.72961373 0.72921109 0.72294372 0.73478261 0.72961373
|
|
0.73291925 0.72727273 0.72186837 0.73305085]
|
|
|
|
mean value: 0.7283015211874566
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.73076923 0.79166667 0.75 0.88 0.75
|
|
0.71428571 0.7 0.7037037 0.82352941]
|
|
|
|
mean value: 0.765347853671383
|
|
|
|
key: train_precision
|
|
value: [0.80582524 0.80188679 0.79166667 0.79904306 0.82038835 0.80188679
|
|
0.77292576 0.78538813 0.78341014 0.79357798]
|
|
|
|
mean value: 0.7955998917952539
|
|
|
|
key: test_recall
|
|
value: [0.60714286 0.67857143 0.65517241 0.72413793 0.78571429 0.64285714
|
|
0.71428571 0.5 0.67857143 0.5 ]
|
|
|
|
mean value: 0.6486453201970444
|
|
|
|
key: train_recall
|
|
value: [0.65354331 0.66929134 0.67588933 0.66007905 0.66535433 0.66929134
|
|
0.69685039 0.67716535 0.66929134 0.68110236]
|
|
|
|
mean value: 0.6717858143226167
|
|
|
|
key: test_roc_auc
|
|
value: [0.73460591 0.71859606 0.73830049 0.73706897 0.83928571 0.71428571
|
|
0.71428571 0.64285714 0.69642857 0.69642857]
|
|
|
|
mean value: 0.7232142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.74772027 0.75164172 0.74936199 0.74736236 0.75984252 0.7519685
|
|
0.74606299 0.74606299 0.74212598 0.7519685 ]
|
|
|
|
mean value: 0.7494117830132894
|
|
|
|
key: test_jcc
|
|
value: [0.53125 0.54285714 0.55882353 0.58333333 0.70967742 0.52941176
|
|
0.55555556 0.41176471 0.52777778 0.4516129 ]
|
|
|
|
mean value: 0.5402064132104455
|
|
|
|
key: train_jcc
|
|
value: [0.56462585 0.57432432 0.5738255 0.56610169 0.58075601 0.57432432
|
|
0.57843137 0.57142857 0.56478405 0.57859532]
|
|
|
|
mean value: 0.5727197025864937
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01061177 0.01070476 0.01078558 0.01167393 0.01194334 0.01158547
|
|
0.01189947 0.01185536 0.01188397 0.01188469]
|
|
|
|
mean value: 0.011482834815979004
|
|
|
|
key: score_time
|
|
value: [0.00923443 0.00920081 0.0092988 0.0098989 0.01000261 0.00946021
|
|
0.00997901 0.00997376 0.01002645 0.01011968]
|
|
|
|
mean value: 0.009719467163085938
|
|
|
|
key: test_mcc
|
|
value: [0.61453202 0.36991397 0.60497779 0.40394089 0.67900461 0.46697379
|
|
0.4645821 0.52174919 0.53605627 0.47187011]
|
|
|
|
mean value: 0.5133600718816802
|
|
|
|
key: train_mcc
|
|
value: [0.578123 0.60371585 0.56022247 0.59190576 0.5894764 0.58529579
|
|
0.57214905 0.59239045 0.5657391 0.59203967]
|
|
|
|
mean value: 0.5831057553248109
|
|
|
|
key: test_accuracy
|
|
value: [0.80701754 0.68421053 0.78947368 0.70175439 0.83928571 0.73214286
|
|
0.73214286 0.75 0.76785714 0.73214286]
|
|
|
|
mean value: 0.7536027568922306
|
|
|
|
key: train_accuracy
|
|
value: [0.78698225 0.80078895 0.7790927 0.79487179 0.79330709 0.79133858
|
|
0.78543307 0.79527559 0.78149606 0.79527559]
|
|
|
|
mean value: 0.7903861684449207
|
|
|
|
key: test_fscore
|
|
value: [0.80701754 0.68965517 0.76 0.70175439 0.83636364 0.74576271
|
|
0.73684211 0.70833333 0.76363636 0.70588235]
|
|
|
|
mean value: 0.7455247605640429
|
|
|
|
key: train_fscore
|
|
value: [0.77405858 0.7926078 0.76859504 0.78512397 0.7826087 0.78099174
|
|
0.77800407 0.78688525 0.77018634 0.7877551 ]
|
|
|
|
mean value: 0.7806816576400366
|
|
|
|
key: test_precision
|
|
value: [0.79310345 0.66666667 0.9047619 0.71428571 0.85185185 0.70967742
|
|
0.72413793 0.85 0.77777778 0.7826087 ]
|
|
|
|
mean value: 0.7774871409661273
|
|
|
|
key: train_precision
|
|
value: [0.82589286 0.82832618 0.80519481 0.82251082 0.82532751 0.82173913
|
|
0.80590717 0.82051282 0.81222707 0.81779661]
|
|
|
|
mean value: 0.8185434984371709
|
|
|
|
key: test_recall
|
|
value: [0.82142857 0.71428571 0.65517241 0.68965517 0.82142857 0.78571429
|
|
0.75 0.60714286 0.75 0.64285714]
|
|
|
|
mean value: 0.7237684729064039
|
|
|
|
key: train_recall
|
|
value: [0.72834646 0.75984252 0.73517787 0.75098814 0.74409449 0.74409449
|
|
0.7519685 0.75590551 0.73228346 0.75984252]
|
|
|
|
mean value: 0.7462543960661043
|
|
|
|
key: test_roc_auc
|
|
value: [0.80726601 0.68472906 0.79187192 0.70197044 0.83928571 0.73214286
|
|
0.73214286 0.75 0.76785714 0.73214286]
|
|
|
|
mean value: 0.7539408866995074
|
|
|
|
key: train_roc_auc
|
|
value: [0.78709813 0.80086988 0.77900626 0.79478541 0.79330709 0.79133858
|
|
0.78543307 0.79527559 0.78149606 0.79527559]
|
|
|
|
mean value: 0.7903885655597398
|
|
|
|
key: test_jcc
|
|
value: [0.67647059 0.52631579 0.61290323 0.54054054 0.71875 0.59459459
|
|
0.58333333 0.5483871 0.61764706 0.54545455]
|
|
|
|
mean value: 0.5964396773036167
|
|
|
|
key: train_jcc
|
|
value: [0.63139932 0.65646259 0.62416107 0.6462585 0.64285714 0.64067797
|
|
0.63666667 0.64864865 0.62626263 0.64983165]
|
|
|
|
mean value: 0.6403226180035451
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01129556 0.01080298 0.0108273 0.01123166 0.01067805 0.00976348
|
|
0.01066089 0.00996542 0.01069593 0.00960088]
|
|
|
|
mean value: 0.010552215576171874
|
|
|
|
key: score_time
|
|
value: [0.01497769 0.01742935 0.01769257 0.01882577 0.01734138 0.01660824
|
|
0.01526499 0.01191354 0.01336932 0.01412559]
|
|
|
|
mean value: 0.015754842758178712
|
|
|
|
key: test_mcc
|
|
value: [0.4464279 0.01606933 0.54433498 0.50927421 0.60753044 0.47187011
|
|
0.4645821 0.29250897 0.39310793 0.28867513]
|
|
|
|
mean value: 0.40343810871351615
|
|
|
|
key: train_mcc
|
|
value: [0.63327212 0.65305775 0.61746075 0.63350842 0.63009708 0.66174563
|
|
0.65356357 0.64598978 0.64665231 0.66540074]
|
|
|
|
mean value: 0.6440748155069376
|
|
|
|
key: test_accuracy
|
|
value: [0.71929825 0.50877193 0.77192982 0.75438596 0.80357143 0.73214286
|
|
0.73214286 0.64285714 0.69642857 0.64285714]
|
|
|
|
mean value: 0.7004385964912281
|
|
|
|
key: train_accuracy
|
|
value: [0.81656805 0.82642998 0.8086785 0.81656805 0.81496063 0.83070866
|
|
0.82677165 0.82283465 0.82283465 0.83267717]
|
|
|
|
mean value: 0.8219031977511686
|
|
|
|
key: test_fscore
|
|
value: [0.68 0.48148148 0.77192982 0.76666667 0.80701754 0.75409836
|
|
0.72727273 0.6 0.70175439 0.61538462]
|
|
|
|
mean value: 0.6905605605847194
|
|
|
|
key: train_fscore
|
|
value: [0.81510934 0.8247012 0.80638723 0.81287726 0.812749 0.828
|
|
0.82608696 0.82 0.81781377 0.83365949]
|
|
|
|
mean value: 0.8197384245167624
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.5 0.78571429 0.74193548 0.79310345 0.6969697
|
|
0.74074074 0.68181818 0.68965517 0.66666667]
|
|
|
|
mean value: 0.7069330949197468
|
|
|
|
key: train_precision
|
|
value: [0.82329317 0.83467742 0.81451613 0.82786885 0.82258065 0.84146341
|
|
0.82936508 0.83333333 0.84166667 0.82879377]
|
|
|
|
mean value: 0.8297558487016459
|
|
|
|
key: test_recall
|
|
value: [0.60714286 0.46428571 0.75862069 0.79310345 0.82142857 0.82142857
|
|
0.71428571 0.53571429 0.71428571 0.57142857]
|
|
|
|
mean value: 0.6801724137931034
|
|
|
|
key: train_recall
|
|
value: [0.80708661 0.81496063 0.79841897 0.79841897 0.80314961 0.81496063
|
|
0.82283465 0.80708661 0.79527559 0.83858268]
|
|
|
|
mean value: 0.8100774952538048
|
|
|
|
key: test_roc_auc
|
|
value: [0.71736453 0.50800493 0.77216749 0.75369458 0.80357143 0.73214286
|
|
0.73214286 0.64285714 0.69642857 0.64285714]
|
|
|
|
mean value: 0.7001231527093595
|
|
|
|
key: train_roc_auc
|
|
value: [0.81658679 0.82645265 0.80865831 0.81653232 0.81496063 0.83070866
|
|
0.82677165 0.82283465 0.82283465 0.83267717]
|
|
|
|
mean value: 0.821901745977405
|
|
|
|
key: test_jcc
|
|
value: [0.51515152 0.31707317 0.62857143 0.62162162 0.67647059 0.60526316
|
|
0.57142857 0.42857143 0.54054054 0.44444444]
|
|
|
|
mean value: 0.5349136467191289
|
|
|
|
key: train_jcc
|
|
value: [0.68791946 0.70169492 0.67558528 0.68474576 0.68456376 0.70648464
|
|
0.7037037 0.69491525 0.69178082 0.7147651 ]
|
|
|
|
mean value: 0.6946158705891714
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02567387 0.02694869 0.02712679 0.02606583 0.02421474 0.02560735
|
|
0.02624297 0.02720189 0.02753401 0.02670598]
|
|
|
|
mean value: 0.0263322114944458
|
|
|
|
key: score_time
|
|
value: [0.01262069 0.0141747 0.01405168 0.01418781 0.01292872 0.01343942
|
|
0.01274204 0.01401901 0.01414609 0.01381564]
|
|
|
|
mean value: 0.013612580299377442
|
|
|
|
key: test_mcc
|
|
value: [0.79161589 0.58562417 0.72064772 0.6746955 0.64951905 0.64450339
|
|
0.78772636 0.57735027 0.64450339 0.64285714]
|
|
|
|
mean value: 0.671904288327684
|
|
|
|
key: train_mcc
|
|
value: [0.70279728 0.72655975 0.73510514 0.73036363 0.71184623 0.73557444
|
|
0.70343094 0.74348384 0.74406591 0.73615032]
|
|
|
|
mean value: 0.7269377497972854
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.78947368 0.85964912 0.8245614 0.82142857 0.82142857
|
|
0.89285714 0.78571429 0.82142857 0.82142857]
|
|
|
|
mean value: 0.8332706766917293
|
|
|
|
key: train_accuracy
|
|
value: [0.85009862 0.86193294 0.86587771 0.86390533 0.85433071 0.86614173
|
|
0.8503937 0.87007874 0.87007874 0.86614173]
|
|
|
|
mean value: 0.8618979949991459
|
|
|
|
key: test_fscore
|
|
value: [0.89655172 0.8 0.86666667 0.84848485 0.83333333 0.82758621
|
|
0.89655172 0.76923077 0.82758621 0.82142857]
|
|
|
|
mean value: 0.8387420051213155
|
|
|
|
key: train_fscore
|
|
value: [0.85660377 0.86792453 0.87169811 0.86907021 0.86090226 0.87218045
|
|
0.85660377 0.87593985 0.87640449 0.87265918]
|
|
|
|
mean value: 0.867998662421086
|
|
|
|
key: test_precision
|
|
value: [0.86666667 0.75 0.83870968 0.75675676 0.78125 0.8
|
|
0.86666667 0.83333333 0.8 0.82142857]
|
|
|
|
mean value: 0.811481167227135
|
|
|
|
key: train_precision
|
|
value: [0.82246377 0.83333333 0.83393502 0.83576642 0.82374101 0.83453237
|
|
0.82246377 0.8381295 0.83571429 0.83214286]
|
|
|
|
mean value: 0.8312222331528408
|
|
|
|
key: test_recall
|
|
value: [0.92857143 0.85714286 0.89655172 0.96551724 0.89285714 0.85714286
|
|
0.92857143 0.71428571 0.85714286 0.82142857]
|
|
|
|
mean value: 0.8719211822660099
|
|
|
|
key: train_recall
|
|
value: [0.89370079 0.90551181 0.91304348 0.90513834 0.9015748 0.91338583
|
|
0.89370079 0.91732283 0.92125984 0.91732283]
|
|
|
|
mean value: 0.9081961345740873
|
|
|
|
key: test_roc_auc
|
|
value: [0.8953202 0.79064039 0.85899015 0.82204433 0.82142857 0.82142857
|
|
0.89285714 0.78571429 0.82142857 0.82142857]
|
|
|
|
mean value: 0.833128078817734
|
|
|
|
key: train_roc_auc
|
|
value: [0.85001245 0.86184681 0.86597056 0.86398649 0.85433071 0.86614173
|
|
0.8503937 0.87007874 0.87007874 0.86614173]
|
|
|
|
mean value: 0.8618981668793377
|
|
|
|
key: test_jcc
|
|
value: [0.8125 0.66666667 0.76470588 0.73684211 0.71428571 0.70588235
|
|
0.8125 0.625 0.70588235 0.6969697 ]
|
|
|
|
mean value: 0.724123477142053
|
|
|
|
key: train_jcc
|
|
value: [0.74917492 0.76666667 0.77257525 0.76845638 0.75577558 0.77333333
|
|
0.74917492 0.77926421 0.78 0.77408638]
|
|
|
|
mean value: 0.7668507632000665
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.10268664 2.27858949 2.08344865 2.25054455 2.20438886 2.15867019
|
|
2.02390742 2.04518032 2.05392957 1.97005749]
|
|
|
|
mean value: 2.1171403169631957
|
|
|
|
key: score_time
|
|
value: [0.01491737 0.0149045 0.01508427 0.01546502 0.01367974 0.02501822
|
|
0.01245546 0.01504374 0.01442242 0.01513696]
|
|
|
|
mean value: 0.01561276912689209
|
|
|
|
key: test_mcc
|
|
value: [0.68736396 0.54433498 0.64901478 0.69397486 0.75434227 0.42857143
|
|
0.64450339 0.68250015 0.65814518 0.64951905]
|
|
|
|
mean value: 0.6392270041971599
|
|
|
|
key: train_mcc
|
|
value: [0.96844169 0.95269145 0.96450468 0.94872473 0.97649905 0.96110675
|
|
0.9606597 0.97250878 0.96862405 0.95670033]
|
|
|
|
mean value: 0.9630461220530966
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.77192982 0.8245614 0.84210526 0.875 0.71428571
|
|
0.82142857 0.83928571 0.82142857 0.82142857]
|
|
|
|
mean value: 0.8173558897243107
|
|
|
|
key: train_accuracy
|
|
value: [0.98422091 0.97633136 0.98224852 0.97435897 0.98818898 0.98031496
|
|
0.98031496 0.98622047 0.98425197 0.97834646]
|
|
|
|
mean value: 0.98147975585892
|
|
|
|
key: test_fscore
|
|
value: [0.83018868 0.77192982 0.82758621 0.85714286 0.88135593 0.71428571
|
|
0.82758621 0.83018868 0.83870968 0.80769231]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
|
|
mean value: 0.8186666085588696
|
|
|
|
key: train_fscore
|
|
value: [0.98425197 0.97647059 0.98224852 0.97425743 0.98809524 0.98
|
|
0.98039216 0.98613861 0.98412698 0.97830375]
|
|
|
|
mean value: 0.9814285243672735
|
|
|
|
key: test_precision
|
|
value: [0.88 0.75862069 0.82758621 0.79411765 0.83870968 0.71428571
|
|
0.8 0.88 0.76470588 0.875 ]
|
|
|
|
mean value: 0.8133025817668558
|
|
|
|
key: train_precision
|
|
value: [0.98425197 0.97265625 0.98031496 0.97619048 0.996 0.99593496
|
|
0.9765625 0.99203187 0.992 0.98023715]
|
|
|
|
mean value: 0.9846180141334085
|
|
|
|
key: test_recall
|
|
value: [0.78571429 0.78571429 0.82758621 0.93103448 0.92857143 0.71428571
|
|
0.85714286 0.78571429 0.92857143 0.75 ]
|
|
|
|
mean value: 0.8294334975369458
|
|
|
|
key: train_recall
|
|
value: [0.98425197 0.98031496 0.98418972 0.97233202 0.98031496 0.96456693
|
|
0.98425197 0.98031496 0.97637795 0.97637795]
|
|
|
|
mean value: 0.9783293392673742
|
|
|
|
key: test_roc_auc
|
|
value: [0.841133 0.77216749 0.82450739 0.84051724 0.875 0.71428571
|
|
0.82142857 0.83928571 0.82142857 0.82142857]
|
|
|
|
mean value: 0.8171182266009852
|
|
|
|
key: train_roc_auc
|
|
value: [0.98422085 0.97632349 0.98225234 0.97435498 0.98818898 0.98031496
|
|
0.98031496 0.98622047 0.98425197 0.97834646]
|
|
|
|
mean value: 0.9814789455665869
|
|
|
|
key: test_jcc
|
|
value: [0.70967742 0.62857143 0.70588235 0.75 0.78787879 0.55555556
|
|
0.70588235 0.70967742 0.72222222 0.67741935]
|
|
|
|
mean value: 0.6952766893658734
|
|
|
|
key: train_jcc
|
|
value: [0.96899225 0.95402299 0.96511628 0.94980695 0.97647059 0.96078431
|
|
0.96153846 0.97265625 0.96875 0.95752896]
|
|
|
|
mean value: 0.9635667036472684
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03688598 0.02955699 0.03395653 0.03542256 0.0288353 0.03235006
|
|
0.03853583 0.0295577 0.02741766 0.03289652]
|
|
|
|
mean value: 0.032541513442993164
|
|
|
|
key: score_time
|
|
value: [0.01212668 0.00929999 0.00903416 0.00894046 0.00887394 0.00895572
|
|
0.0091536 0.00894165 0.00892639 0.00911331]
|
|
|
|
mean value: 0.009336590766906738
|
|
|
|
key: test_mcc
|
|
value: [0.50862069 0.72706729 0.61405719 0.66268617 0.64285714 0.71611487
|
|
0.75047877 0.65814518 0.75047877 0.67900461]
|
|
|
|
mean value: 0.6709510695513776
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75438596 0.85964912 0.80701754 0.8245614 0.82142857 0.85714286
|
|
0.875 0.82142857 0.875 0.83928571]
|
|
|
|
mean value: 0.8334899749373433
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.86666667 0.81355932 0.84375 0.82142857 0.85185185
|
|
0.87719298 0.8 0.87719298 0.83636364]
|
|
|
|
mean value: 0.8338006013256906
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.8125 0.8 0.77142857 0.82142857 0.88461538
|
|
0.86206897 0.90909091 0.86206897 0.85185185]
|
|
|
|
mean value: 0.8325053219449771
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.92857143 0.82758621 0.93103448 0.82142857 0.82142857
|
|
0.89285714 0.71428571 0.89285714 0.82142857]
|
|
|
|
mean value: 0.8401477832512315
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75431034 0.86083744 0.80665025 0.8226601 0.82142857 0.85714286
|
|
0.875 0.82142857 0.875 0.83928571]
|
|
|
|
mean value: 0.8333743842364533
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.76470588 0.68571429 0.72972973 0.6969697 0.74193548
|
|
0.78125 0.66666667 0.78125 0.71875 ]
|
|
|
|
mean value: 0.7166971745304288
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13052034 0.12769079 0.12959528 0.1288991 0.13007855 0.1285646
|
|
0.12925696 0.12970114 0.13001776 0.12802267]
|
|
|
|
mean value: 0.1292347192764282
|
|
|
|
key: score_time
|
|
value: [0.01803589 0.01821971 0.01817918 0.01811981 0.02113414 0.0180819
|
|
0.01810694 0.01817536 0.01921439 0.01816344]
|
|
|
|
mean value: 0.018543076515197755
|
|
|
|
key: test_mcc
|
|
value: [0.68472906 0.43842365 0.75462449 0.6746955 0.78772636 0.57142857
|
|
0.75434227 0.6882472 0.68250015 0.53605627]
|
|
|
|
mean value: 0.6572773523500841
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84210526 0.71929825 0.87719298 0.8245614 0.89285714 0.78571429
|
|
0.875 0.82142857 0.83928571 0.76785714]
|
|
|
|
mean value: 0.82453007518797
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.71428571 0.88135593 0.84848485 0.89655172 0.78571429
|
|
0.88135593 0.7826087 0.84745763 0.76363636]
|
|
|
|
mean value: 0.8243556386594636
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.82758621 0.71428571 0.86666667 0.75675676 0.86666667 0.78571429
|
|
0.83870968 1. 0.80645161 0.77777778]
|
|
|
|
mean value: 0.8240615365087001
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.71428571 0.89655172 0.96551724 0.92857143 0.78571429
|
|
0.92857143 0.64285714 0.89285714 0.75 ]
|
|
|
|
mean value: 0.8362068965517242
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.84236453 0.71921182 0.87684729 0.82204433 0.89285714 0.78571429
|
|
0.875 0.82142857 0.83928571 0.76785714]
|
|
|
|
mean value: 0.8242610837438424
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.55555556 0.78787879 0.73684211 0.8125 0.64705882
|
|
0.78787879 0.64285714 0.73529412 0.61764706]
|
|
|
|
mean value: 0.7050785106706159
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01056218 0.0111351 0.01032853 0.01040602 0.01029873 0.01050901
|
|
0.01047373 0.01108289 0.01096153 0.01055861]
|
|
|
|
mean value: 0.010631632804870606
|
|
|
|
key: score_time
|
|
value: [0.00898886 0.00937748 0.0088644 0.00880075 0.00952053 0.00879717
|
|
0.00898719 0.0097003 0.0089972 0.00896549]
|
|
|
|
mean value: 0.009099936485290528
|
|
|
|
key: test_mcc
|
|
value: [0.34042547 0.19826978 0.51048128 0.40320623 0.67900461 0.32328954
|
|
0.39310793 0.43876345 0.42857143 0.28644595]
|
|
|
|
mean value: 0.4001565656283396
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.59649123 0.75438596 0.70175439 0.83928571 0.66071429
|
|
0.69642857 0.71428571 0.71428571 0.64285714]
|
|
|
|
mean value: 0.6987155388471178
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.68852459 0.62295082 0.75 0.71186441 0.84210526 0.6779661
|
|
0.69090909 0.68 0.71428571 0.62962963]
|
|
|
|
mean value: 0.7008235616292972
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.57575758 0.77777778 0.7 0.82758621 0.64516129
|
|
0.7037037 0.77272727 0.71428571 0.65384615]
|
|
|
|
mean value: 0.7007209331680967
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.67857143 0.72413793 0.72413793 0.85714286 0.71428571
|
|
0.67857143 0.60714286 0.71428571 0.60714286]
|
|
|
|
mean value: 0.7055418719211822
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66810345 0.5979064 0.75492611 0.70135468 0.83928571 0.66071429
|
|
0.69642857 0.71428571 0.71428571 0.64285714]
|
|
|
|
mean value: 0.6990147783251232
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.525 0.45238095 0.6 0.55263158 0.72727273 0.51282051
|
|
0.52777778 0.51515152 0.55555556 0.45945946]
|
|
|
|
mean value: 0.5428050079365869
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.92051291 1.89167118 1.92020392 1.93275261 1.92187858 1.92772245
|
|
1.94392252 1.92265916 1.89350748 1.90187287]
|
|
|
|
mean value: 1.9176703691482544
|
|
|
|
key: score_time
|
|
value: [0.09649062 0.09537554 0.09934902 0.09326696 0.09322071 0.09627128
|
|
0.09829021 0.0933094 0.09369445 0.09350157]
|
|
|
|
mean value: 0.09527697563171386
|
|
|
|
key: test_mcc
|
|
value: [0.75492611 0.78940887 0.75492611 0.74822828 0.79385662 0.71611487
|
|
0.85714286 0.77459667 0.75434227 0.61706091]
|
|
|
|
mean value: 0.7560603560730887
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.87719298 0.89473684 0.87719298 0.85964912 0.89285714 0.85714286
|
|
0.92857143 0.875 0.875 0.80357143]
|
|
|
|
mean value: 0.8740914786967419
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.87719298 0.89285714 0.87719298 0.87878788 0.9 0.85185185
|
|
0.92857143 0.85714286 0.88135593 0.78431373]
|
|
|
|
mean value: 0.8729266781817026
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.86206897 0.89285714 0.89285714 0.78378378 0.84375 0.88461538
|
|
0.92857143 1. 0.83870968 0.86956522]
|
|
|
|
mean value: 0.8796778743012783
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.89285714 0.89285714 0.86206897 1. 0.96428571 0.82142857
|
|
0.92857143 0.75 0.92857143 0.71428571]
|
|
|
|
mean value: 0.8754926108374385
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.87746305 0.89470443 0.87746305 0.85714286 0.89285714 0.85714286
|
|
0.92857143 0.875 0.875 0.80357143]
|
|
|
|
mean value: 0.8738916256157636
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.78125 0.80645161 0.78125 0.78378378 0.81818182 0.74193548
|
|
0.86666667 0.75 0.78787879 0.64516129]
|
|
|
|
mean value: 0.776255944360783
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...05', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.90896702 0.98829269 0.98135185 0.97723031 1.01252842 0.95426917
|
|
0.99308801 1.06436086 1.00643754 0.9748466 ]
|
|
|
|
mean value: 0.9861372470855713
|
|
|
|
key: score_time
|
|
value: [0.26905274 0.24887419 0.28206348 0.25038576 0.25746202 0.25291276
|
|
0.27563763 0.25317621 0.20239544 0.21187234]
|
|
|
|
mean value: 0.25038325786590576
|
|
|
|
key: test_mcc
|
|
value: [0.8951918 0.79161589 0.75492611 0.80685836 0.79385662 0.71428571
|
|
0.93094934 0.76225171 0.78772636 0.68250015]
|
|
|
|
mean value: 0.7920162048354679
|
|
|
|
key: train_mcc
|
|
value: [0.91716593 0.90927764 0.91321465 0.91321465 0.91738682 0.91341414
|
|
0.90945587 0.90945587 0.91732994 0.9213741 ]
|
|
|
|
mean value: 0.9141289628968229
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 0.89473684 0.87719298 0.89473684 0.89285714 0.85714286
|
|
0.96428571 0.875 0.89285714 0.83928571]
|
|
|
|
mean value: 0.893546365914787
|
|
|
|
key: train_accuracy
|
|
value: [0.95857988 0.95463511 0.9566075 0.9566075 0.95866142 0.95669291
|
|
0.95472441 0.95472441 0.95866142 0.96062992]
|
|
|
|
mean value: 0.957052446846511
|
|
|
|
key: test_fscore
|
|
value: [0.94545455 0.89655172 0.87719298 0.90625 0.9 0.85714286
|
|
0.96551724 0.8627451 0.89655172 0.83018868]
|
|
|
|
mean value: 0.8937594851993214
|
|
|
|
key: train_fscore
|
|
value: [0.95874263 0.95463511 0.95652174 0.95652174 0.95841584 0.95686275
|
|
0.95481336 0.95481336 0.95874263 0.9609375 ]
|
|
|
|
mean value: 0.9571006657707237
|
|
|
|
key: test_precision
|
|
value: [0.96296296 0.86666667 0.89285714 0.82857143 0.84375 0.85714286
|
|
0.93333333 0.95652174 0.86666667 0.88 ]
|
|
|
|
mean value: 0.8888472797331493
|
|
|
|
key: train_precision
|
|
value: [0.95686275 0.95652174 0.95652174 0.95652174 0.96414343 0.953125
|
|
0.95294118 0.95294118 0.95686275 0.95348837]
|
|
|
|
mean value: 0.9559929858916403
|
|
|
|
key: test_recall
|
|
value: [0.92857143 0.92857143 0.86206897 1. 0.96428571 0.85714286
|
|
1. 0.78571429 0.92857143 0.78571429]
|
|
|
|
mean value: 0.904064039408867
|
|
|
|
key: train_recall
|
|
value: [0.96062992 0.95275591 0.95652174 0.95652174 0.95275591 0.96062992
|
|
0.95669291 0.95669291 0.96062992 0.96850394]
|
|
|
|
mean value: 0.9582334816843546
|
|
|
|
key: test_roc_auc
|
|
value: [0.94704433 0.8953202 0.87746305 0.89285714 0.89285714 0.85714286
|
|
0.96428571 0.875 0.89285714 0.83928571]
|
|
|
|
mean value: 0.8934113300492611
|
|
|
|
key: train_roc_auc
|
|
value: [0.95857583 0.95463882 0.95660733 0.95660733 0.95866142 0.95669291
|
|
0.95472441 0.95472441 0.95866142 0.96062992]
|
|
|
|
mean value: 0.9570523793221499
|
|
|
|
key: test_jcc
|
|
value: [0.89655172 0.8125 0.78125 0.82857143 0.81818182 0.75
|
|
0.93333333 0.75862069 0.8125 0.70967742]
|
|
|
|
mean value: 0.8101186413234522
|
|
|
|
key: train_jcc
|
|
value: [0.92075472 0.91320755 0.91666667 0.91666667 0.92015209 0.91729323
|
|
0.91353383 0.91353383 0.92075472 0.92481203]
|
|
|
|
mean value: 0.9177375338050988
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02425981 0.01027536 0.01034474 0.01049304 0.01042962 0.01040864
|
|
0.01042223 0.01039553 0.01024818 0.01041102]
|
|
|
|
mean value: 0.011768817901611328
|
|
|
|
key: score_time
|
|
value: [0.01377463 0.0090301 0.00905323 0.00901341 0.00887418 0.00894308
|
|
0.00987649 0.0089922 0.00897455 0.00894618]
|
|
|
|
mean value: 0.009547805786132813
|
|
|
|
key: test_mcc
|
|
value: [0.61453202 0.36991397 0.60497779 0.40394089 0.67900461 0.46697379
|
|
0.4645821 0.52174919 0.53605627 0.47187011]
|
|
|
|
mean value: 0.5133600718816802
|
|
|
|
key: train_mcc
|
|
value: [0.578123 0.60371585 0.56022247 0.59190576 0.5894764 0.58529579
|
|
0.57214905 0.59239045 0.5657391 0.59203967]
|
|
|
|
mean value: 0.5831057553248109
|
|
|
|
key: test_accuracy
|
|
value: [0.80701754 0.68421053 0.78947368 0.70175439 0.83928571 0.73214286
|
|
0.73214286 0.75 0.76785714 0.73214286]
|
|
|
|
mean value: 0.7536027568922306
|
|
|
|
key: train_accuracy
|
|
value: [0.78698225 0.80078895 0.7790927 0.79487179 0.79330709 0.79133858
|
|
0.78543307 0.79527559 0.78149606 0.79527559]
|
|
|
|
mean value: 0.7903861684449207
|
|
|
|
key: test_fscore
|
|
value: [0.80701754 0.68965517 0.76 0.70175439 0.83636364 0.74576271
|
|
0.73684211 0.70833333 0.76363636 0.70588235]
|
|
|
|
mean value: 0.7455247605640429
|
|
|
|
key: train_fscore
|
|
value: [0.77405858 0.7926078 0.76859504 0.78512397 0.7826087 0.78099174
|
|
0.77800407 0.78688525 0.77018634 0.7877551 ]
|
|
|
|
mean value: 0.7806816576400366
|
|
|
|
key: test_precision
|
|
value: [0.79310345 0.66666667 0.9047619 0.71428571 0.85185185 0.70967742
|
|
0.72413793 0.85 0.77777778 0.7826087 ]
|
|
|
|
mean value: 0.7774871409661273
|
|
|
|
key: train_precision
|
|
value: [0.82589286 0.82832618 0.80519481 0.82251082 0.82532751 0.82173913
|
|
0.80590717 0.82051282 0.81222707 0.81779661]
|
|
|
|
mean value: 0.8185434984371709
|
|
|
|
key: test_recall
|
|
value: [0.82142857 0.71428571 0.65517241 0.68965517 0.82142857 0.78571429
|
|
0.75 0.60714286 0.75 0.64285714]
|
|
|
|
mean value: 0.7237684729064039
|
|
|
|
key: train_recall
|
|
value: [0.72834646 0.75984252 0.73517787 0.75098814 0.74409449 0.74409449
|
|
0.7519685 0.75590551 0.73228346 0.75984252]
|
|
|
|
mean value: 0.7462543960661043
|
|
|
|
key: test_roc_auc
|
|
value: [0.80726601 0.68472906 0.79187192 0.70197044 0.83928571 0.73214286
|
|
0.73214286 0.75 0.76785714 0.73214286]
|
|
|
|
mean value: 0.7539408866995074
|
|
|
|
key: train_roc_auc
|
|
value: [0.78709813 0.80086988 0.77900626 0.79478541 0.79330709 0.79133858
|
|
0.78543307 0.79527559 0.78149606 0.79527559]
|
|
|
|
mean value: 0.7903885655597398
|
|
|
|
key: test_jcc
|
|
value: [0.67647059 0.52631579 0.61290323 0.54054054 0.71875 0.59459459
|
|
0.58333333 0.5483871 0.61764706 0.54545455]
|
|
|
|
mean value: 0.5964396773036167
|
|
|
|
key: train_jcc
|
|
value: [0.63139932 0.65646259 0.62416107 0.6462585 0.64285714 0.64067797
|
|
0.63666667 0.64864865 0.62626263 0.64983165]
|
|
|
|
mean value: 0.6403226180035451
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.26489162 0.08565187 0.09101462 0.11429906 0.09500623 0.10431981
|
|
0.09754062 0.10011506 0.09461498 0.10357738]
|
|
|
|
mean value: 0.11510312557220459
|
|
|
|
key: score_time
|
|
value: [0.0110383 0.01114631 0.01103783 0.01199245 0.01212549 0.01195836
|
|
0.0121727 0.01191068 0.01198483 0.01108575]
|
|
|
|
mean value: 0.011645269393920899
|
|
|
|
key: test_mcc
|
|
value: [0.79110556 0.82512315 0.8951918 0.77728159 0.83484711 0.82618439
|
|
0.89342711 0.82195294 0.82195294 0.83484711]
|
|
|
|
mean value: 0.832191371439121
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.9122807 0.94736842 0.87719298 0.91071429 0.91071429
|
|
0.94642857 0.91071429 0.91071429 0.91071429]
|
|
|
|
mean value: 0.9131578947368421
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.9122807 0.94915254 0.89230769 0.91803279 0.90566038
|
|
0.94736842 0.90909091 0.90909091 0.90196078]
|
|
|
|
mean value: 0.913383401311576
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.89655172 0.93333333 0.80555556 0.84848485 0.96
|
|
0.93103448 0.92592593 0.92592593 1. ]
|
|
|
|
mean value: 0.9149888719199064
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.92857143 0.96551724 1. 1. 0.85714286
|
|
0.96428571 0.89285714 0.89285714 0.82142857]
|
|
|
|
mean value: 0.9179802955665025
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.89408867 0.91256158 0.94704433 0.875 0.91071429 0.91071429
|
|
0.94642857 0.91071429 0.91071429 0.91071429]
|
|
|
|
mean value: 0.9128694581280788
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.83870968 0.90322581 0.80555556 0.84848485 0.82758621
|
|
0.9 0.83333333 0.83333333 0.82142857]
|
|
|
|
mean value: 0.8411657332903162
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04227829 0.08518744 0.04829073 0.07945228 0.07573318 0.07473159
|
|
0.09422827 0.08977342 0.08366513 0.07632232]
|
|
|
|
mean value: 0.07496626377105713
|
|
|
|
key: score_time
|
|
value: [0.01899529 0.01236749 0.01880217 0.01878977 0.01956773 0.02375746
|
|
0.01924992 0.01905847 0.02270031 0.01882863]
|
|
|
|
mean value: 0.019211721420288087
|
|
|
|
key: test_mcc
|
|
value: [0.65104858 0.47348988 0.66268617 0.47938227 0.68250015 0.57142857
|
|
0.53605627 0.5728919 0.65814518 0.53605627]
|
|
|
|
mean value: 0.5823685246344368
|
|
|
|
key: train_mcc
|
|
value: [0.80809907 0.83068165 0.78409007 0.81088613 0.81142619 0.78483432
|
|
0.80724303 0.79589265 0.8167921 0.8172503 ]
|
|
|
|
mean value: 0.8067195518850142
|
|
|
|
key: test_accuracy
|
|
value: [0.8245614 0.73684211 0.8245614 0.73684211 0.83928571 0.78571429
|
|
0.76785714 0.78571429 0.82142857 0.76785714]
|
|
|
|
mean value: 0.7890664160401002
|
|
|
|
key: train_accuracy
|
|
value: [0.90335306 0.91518738 0.89151874 0.90532544 0.90551181 0.89173228
|
|
0.90354331 0.8976378 0.90748031 0.90748031]
|
|
|
|
mean value: 0.9028770442156269
|
|
|
|
key: test_fscore
|
|
value: [0.82758621 0.72727273 0.84375 0.76190476 0.84745763 0.78571429
|
|
0.77192982 0.77777778 0.83870968 0.76363636]
|
|
|
|
mean value: 0.7945739252301871
|
|
|
|
key: train_fscore
|
|
value: [0.90630975 0.91650485 0.89402697 0.90625 0.90697674 0.89483748
|
|
0.90448343 0.8996139 0.91047619 0.91081594]
|
|
|
|
mean value: 0.9050295261208517
|
|
|
|
key: test_precision
|
|
value: [0.8 0.74074074 0.77142857 0.70588235 0.80645161 0.78571429
|
|
0.75862069 0.80769231 0.76470588 0.77777778]
|
|
|
|
mean value: 0.77190142212062
|
|
|
|
key: train_precision
|
|
value: [0.88104089 0.90421456 0.87218045 0.8957529 0.89312977 0.86988848
|
|
0.8957529 0.88257576 0.88191882 0.87912088]
|
|
|
|
mean value: 0.8855575396927519
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.71428571 0.93103448 0.82758621 0.89285714 0.78571429
|
|
0.78571429 0.75 0.92857143 0.75 ]
|
|
|
|
mean value: 0.8222906403940886
|
|
|
|
key: train_recall
|
|
value: [0.93307087 0.92913386 0.91699605 0.91699605 0.92125984 0.92125984
|
|
0.91338583 0.91732283 0.94094488 0.94488189]
|
|
|
|
mean value: 0.9255251937381346
|
|
|
|
key: test_roc_auc
|
|
value: [0.82512315 0.7364532 0.8226601 0.73522167 0.83928571 0.78571429
|
|
0.76785714 0.78571429 0.82142857 0.76785714]
|
|
|
|
mean value: 0.7887315270935961
|
|
|
|
key: train_roc_auc
|
|
value: [0.90329433 0.91515981 0.89156889 0.90534842 0.90551181 0.89173228
|
|
0.90354331 0.8976378 0.90748031 0.90748031]
|
|
|
|
mean value: 0.9028757274905854
|
|
|
|
key: test_jcc
|
|
value: [0.70588235 0.57142857 0.72972973 0.61538462 0.73529412 0.64705882
|
|
0.62857143 0.63636364 0.72222222 0.61764706]
|
|
|
|
mean value: 0.660958255664138
|
|
|
|
key: train_jcc
|
|
value: [0.82867133 0.84587814 0.80836237 0.82857143 0.82978723 0.80968858
|
|
0.82562278 0.81754386 0.83566434 0.83623693]
|
|
|
|
mean value: 0.8266026983050965
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01351833 0.01338243 0.01024508 0.00988245 0.00978446 0.00980473
|
|
0.01015139 0.00987172 0.00992179 0.0109098 ]
|
|
|
|
mean value: 0.010747218132019043
|
|
|
|
key: score_time
|
|
value: [0.01177025 0.00918531 0.00883245 0.00866175 0.00867105 0.0087378
|
|
0.00884843 0.0088172 0.00870466 0.00935793]
|
|
|
|
mean value: 0.009158682823181153
|
|
|
|
key: test_mcc
|
|
value: [0.71921182 0.44418104 0.62473685 0.47348988 0.64285714 0.50128041
|
|
0.53881591 0.52174919 0.61065803 0.4330127 ]
|
|
|
|
mean value: 0.5509992973599899
|
|
|
|
key: train_mcc
|
|
value: [0.55424668 0.56615725 0.54635004 0.55426731 0.58665509 0.55907245
|
|
0.53549948 0.54735015 0.54332393 0.5552257 ]
|
|
|
|
mean value: 0.5548148076158407
|
|
|
|
key: test_accuracy
|
|
value: [0.85964912 0.71929825 0.80701754 0.73684211 0.82142857 0.75
|
|
0.76785714 0.75 0.80357143 0.71428571]
|
|
|
|
mean value: 0.7729949874686717
|
|
|
|
key: train_accuracy
|
|
value: [0.77712032 0.78303748 0.77317554 0.77712032 0.79330709 0.77952756
|
|
0.76771654 0.77362205 0.77165354 0.77755906]
|
|
|
|
mean value: 0.7773839475686841
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.73333333 0.79245283 0.74576271 0.82142857 0.75862069
|
|
0.77966102 0.70833333 0.81355932 0.69230769]
|
|
|
|
mean value: 0.7702602358237097
|
|
|
|
key: train_fscore
|
|
value: [0.77712032 0.78174603 0.77227723 0.77534791 0.79207921 0.77865613
|
|
0.76953125 0.77582846 0.77075099 0.77534791]
|
|
|
|
mean value: 0.7768685432684644
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.6875 0.875 0.73333333 0.82142857 0.73333333
|
|
0.74193548 0.85 0.77419355 0.75 ]
|
|
|
|
mean value: 0.7823867127496159
|
|
|
|
key: train_precision
|
|
value: [0.77865613 0.788 0.77380952 0.78 0.79681275 0.78174603
|
|
0.76356589 0.76833977 0.77380952 0.78313253]
|
|
|
|
mean value: 0.7787872144784396
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.78571429 0.72413793 0.75862069 0.82142857 0.78571429
|
|
0.82142857 0.60714286 0.85714286 0.64285714]
|
|
|
|
mean value: 0.7661330049261084
|
|
|
|
key: train_recall
|
|
value: [0.77559055 0.77559055 0.77075099 0.77075099 0.78740157 0.77559055
|
|
0.77559055 0.78346457 0.76771654 0.76771654]
|
|
|
|
mean value: 0.775016339360742
|
|
|
|
key: test_roc_auc
|
|
value: [0.85960591 0.72044335 0.80849754 0.7364532 0.82142857 0.75
|
|
0.76785714 0.75 0.80357143 0.71428571]
|
|
|
|
mean value: 0.7732142857142856
|
|
|
|
key: train_roc_auc
|
|
value: [0.77712334 0.78305219 0.77317077 0.77710778 0.79330709 0.77952756
|
|
0.76771654 0.77362205 0.77165354 0.77755906]
|
|
|
|
mean value: 0.777383990538732
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.57894737 0.65625 0.59459459 0.6969697 0.61111111
|
|
0.63888889 0.5483871 0.68571429 0.52941176]
|
|
|
|
mean value: 0.6290274807179705
|
|
|
|
key: train_jcc
|
|
value: [0.63548387 0.64169381 0.62903226 0.63311688 0.6557377 0.63754045
|
|
0.62539683 0.63375796 0.62700965 0.63311688]
|
|
|
|
mean value: 0.6351886297815925
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01435208 0.02308655 0.01676607 0.0206604 0.02147317 0.0185039
|
|
0.01732731 0.02168465 0.01905084 0.02053761]
|
|
|
|
mean value: 0.019344258308410644
|
|
|
|
key: score_time
|
|
value: [0.01009798 0.01145768 0.01185679 0.01174641 0.01173425 0.01180553
|
|
0.01177812 0.01182508 0.01182938 0.01185131]
|
|
|
|
mean value: 0.01159825325012207
|
|
|
|
key: test_mcc
|
|
value: [0.86189955 0.64889453 0.79110556 0.64517917 0.64450339 0.37300192
|
|
0.68250015 0.47951222 0.61065803 0.67900461]
|
|
|
|
mean value: 0.6416259115390277
|
|
|
|
key: train_mcc
|
|
value: [0.75511243 0.70541654 0.76111265 0.76806178 0.78430759 0.36779872
|
|
0.66597915 0.77991449 0.75330426 0.76786532]
|
|
|
|
mean value: 0.7108872936486214
|
|
|
|
key: test_accuracy
|
|
value: [0.92982456 0.80701754 0.89473684 0.80701754 0.82142857 0.64285714
|
|
0.83928571 0.73214286 0.80357143 0.83928571]
|
|
|
|
mean value: 0.8117167919799498
|
|
|
|
key: train_accuracy
|
|
value: [0.87573964 0.84023669 0.87968442 0.8816568 0.88976378 0.62204724
|
|
0.83267717 0.88976378 0.87401575 0.88385827]
|
|
|
|
mean value: 0.8469443538492599
|
|
|
|
key: test_fscore
|
|
value: [0.93103448 0.83076923 0.9 0.8358209 0.82758621 0.47368421
|
|
0.84745763 0.69387755 0.81355932 0.83636364]
|
|
|
|
mean value: 0.7990153163009694
|
|
|
|
key: train_fscore
|
|
value: [0.88180113 0.85913043 0.8833652 0.88764045 0.89552239 0.39622642
|
|
0.82897384 0.89147287 0.88104089 0.88270378]
|
|
|
|
mean value: 0.8287877394647932
|
|
|
|
key: test_precision
|
|
value: [0.9 0.72972973 0.87096774 0.73684211 0.8 0.9
|
|
0.80645161 0.80952381 0.77419355 0.85185185]
|
|
|
|
mean value: 0.8179560399594356
|
|
|
|
key: train_precision
|
|
value: [0.84229391 0.7694704 0.85555556 0.84341637 0.85106383 0.984375
|
|
0.84773663 0.8778626 0.83450704 0.89156627]
|
|
|
|
mean value: 0.8597847595492023
|
|
|
|
key: test_recall
|
|
value: [0.96428571 0.96428571 0.93103448 0.96551724 0.85714286 0.32142857
|
|
0.89285714 0.60714286 0.85714286 0.82142857]
|
|
|
|
mean value: 0.8182266009852217
|
|
|
|
key: train_recall
|
|
value: [0.92519685 0.97244094 0.91304348 0.93675889 0.94488189 0.2480315
|
|
0.81102362 0.90551181 0.93307087 0.87401575]
|
|
|
|
mean value: 0.8463975599887958
|
|
|
|
key: test_roc_auc
|
|
value: [0.93041872 0.80972906 0.89408867 0.80418719 0.82142857 0.64285714
|
|
0.83928571 0.73214286 0.80357143 0.83928571]
|
|
|
|
mean value: 0.8116995073891626
|
|
|
|
key: train_roc_auc
|
|
value: [0.8756419 0.83997541 0.87975009 0.88176527 0.88976378 0.62204724
|
|
0.83267717 0.88976378 0.87401575 0.88385827]
|
|
|
|
mean value: 0.8469258659861193
|
|
|
|
key: test_jcc
|
|
value: [0.87096774 0.71052632 0.81818182 0.71794872 0.70588235 0.31034483
|
|
0.73529412 0.53125 0.68571429 0.71875 ]
|
|
|
|
mean value: 0.6804860177744222
|
|
|
|
key: train_jcc
|
|
value: [0.7885906 0.75304878 0.79109589 0.7979798 0.81081081 0.24705882
|
|
0.70790378 0.8041958 0.78737542 0.79003559]
|
|
|
|
mean value: 0.7278095293981167
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02624679 0.0266242 0.02384472 0.02349424 0.02361321 0.03041553
|
|
0.02481651 0.01954269 0.0195632 0.02453136]
|
|
|
|
mean value: 0.024269247055053712
|
|
|
|
key: score_time
|
|
value: [0.01185083 0.01200604 0.01181102 0.01179695 0.0117557 0.01181388
|
|
0.01185155 0.01174688 0.01180363 0.01184297]
|
|
|
|
mean value: 0.011827945709228516
|
|
|
|
key: test_mcc
|
|
value: [0.47914675 0.63486347 0.49766536 0.66345955 0.5728919 0.64450339
|
|
0.68250015 0.64951905 0.70082556 0.53881591]
|
|
|
|
mean value: 0.6064191077109774
|
|
|
|
key: train_mcc
|
|
value: [0.55505227 0.77953591 0.52479166 0.77599818 0.76361418 0.80881089
|
|
0.778255 0.71270828 0.70499098 0.78222041]
|
|
|
|
mean value: 0.7185977744242504
|
|
|
|
key: test_accuracy
|
|
value: [0.70175439 0.80701754 0.70175439 0.80701754 0.78571429 0.82142857
|
|
0.83928571 0.82142857 0.83928571 0.76785714]
|
|
|
|
mean value: 0.7892543859649123
|
|
|
|
key: train_accuracy
|
|
value: [0.74161736 0.88757396 0.72189349 0.88560158 0.87795276 0.9015748
|
|
0.88779528 0.84448819 0.84251969 0.88779528]
|
|
|
|
mean value: 0.8478812374784512
|
|
|
|
key: test_fscore
|
|
value: [0.76056338 0.82539683 0.77333333 0.84057971 0.77777778 0.82758621
|
|
0.84745763 0.83333333 0.85714286 0.77966102]
|
|
|
|
mean value: 0.8122832068375093
|
|
|
|
key: train_fscore
|
|
value: [0.79304897 0.89345794 0.7800312 0.89138577 0.86864407 0.9070632
|
|
0.89224953 0.86212914 0.85915493 0.89463956]
|
|
|
|
mean value: 0.8641804309146348
|
|
|
|
key: test_precision
|
|
value: [0.62790698 0.74285714 0.63043478 0.725 0.80769231 0.8
|
|
0.80645161 0.78125 0.77142857 0.74193548]
|
|
|
|
mean value: 0.7434956878105097
|
|
|
|
key: train_precision
|
|
value: [0.66226913 0.85053381 0.6443299 0.84697509 0.94036697 0.85915493
|
|
0.85818182 0.77429467 0.77707006 0.84320557]
|
|
|
|
mean value: 0.805638195268187
|
|
|
|
key: test_recall
|
|
value: [0.96428571 0.92857143 1. 1. 0.75 0.85714286
|
|
0.89285714 0.89285714 0.96428571 0.82142857]
|
|
|
|
mean value: 0.9071428571428571
|
|
|
|
key: train_recall
|
|
value: [0.98818898 0.94094488 0.98814229 0.94071146 0.80708661 0.96062992
|
|
0.92913386 0.97244094 0.96062992 0.95275591]
|
|
|
|
mean value: 0.9440664778562758
|
|
|
|
key: test_roc_auc
|
|
value: [0.70628079 0.8091133 0.69642857 0.80357143 0.78571429 0.82142857
|
|
0.83928571 0.82142857 0.83928571 0.76785714]
|
|
|
|
mean value: 0.7890394088669951
|
|
|
|
key: train_roc_auc
|
|
value: [0.74113006 0.88746849 0.7224176 0.88571006 0.87795276 0.9015748
|
|
0.88779528 0.84448819 0.84251969 0.88779528]
|
|
|
|
mean value: 0.8478852198811118
|
|
|
|
key: test_jcc
|
|
value: [0.61363636 0.7027027 0.63043478 0.725 0.63636364 0.70588235
|
|
0.73529412 0.71428571 0.75 0.63888889]
|
|
|
|
mean value: 0.6852488559074237
|
|
|
|
key: train_jcc
|
|
value: [0.65706806 0.80743243 0.63938619 0.80405405 0.76779026 0.82993197
|
|
0.80546075 0.75766871 0.75308642 0.80936455]
|
|
|
|
mean value: 0.7631243404291178
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20187092 0.18617606 0.18697357 0.19007778 0.19041204 0.18814564
|
|
0.19574547 0.20017982 0.19650555 0.20053363]
|
|
|
|
mean value: 0.19366204738616943
|
|
|
|
key: score_time
|
|
value: [0.01539469 0.01526952 0.01546836 0.01585555 0.01569057 0.01600099
|
|
0.01669431 0.01603317 0.01670384 0.0161972 ]
|
|
|
|
mean value: 0.015930819511413574
|
|
|
|
key: test_mcc
|
|
value: [0.72064772 0.8953202 0.85960591 0.58358651 0.75434227 0.79385662
|
|
0.85933785 0.78772636 0.76225171 0.83484711]
|
|
|
|
mean value: 0.7851522264611699
|
|
|
|
key: train_mcc
|
|
value: [0.9606705 0.93734639 0.94878539 0.94480322 0.94514562 0.94499908
|
|
0.93712408 0.93712408 0.94112724 0.9332517 ]
|
|
|
|
mean value: 0.9430377313978728
|
|
|
|
key: test_accuracy
|
|
value: [0.85964912 0.94736842 0.92982456 0.78947368 0.875 0.89285714
|
|
0.92857143 0.89285714 0.875 0.91071429]
|
|
|
|
mean value: 0.8901315789473684
|
|
|
|
key: train_accuracy
|
|
value: [0.98027613 0.96844181 0.97435897 0.97238659 0.97244094 0.97244094
|
|
0.96850394 0.96850394 0.97047244 0.96653543]
|
|
|
|
mean value: 0.9714361148643402
|
|
|
|
key: test_fscore
|
|
value: [0.85185185 0.94736842 0.93103448 0.80645161 0.88135593 0.88461538
|
|
0.93103448 0.88888889 0.8852459 0.90196078]
|
|
|
|
mean value: 0.8909807742985684
|
|
|
|
key: train_fscore
|
|
value: [0.98046875 0.96899225 0.97445972 0.97244094 0.97276265 0.97265625
|
|
0.96875 0.96875 0.97076023 0.9668616 ]
|
|
|
|
mean value: 0.9716902396167861
|
|
|
|
key: test_precision
|
|
value: [0.88461538 0.93103448 0.93103448 0.75757576 0.83870968 0.95833333
|
|
0.9 0.92307692 0.81818182 1. ]
|
|
|
|
mean value: 0.8942561859719813
|
|
|
|
key: train_precision
|
|
value: [0.97286822 0.95419847 0.96875 0.96862745 0.96153846 0.96511628
|
|
0.96124031 0.96124031 0.96138996 0.95752896]
|
|
|
|
mean value: 0.9632498420999285
|
|
|
|
key: test_recall
|
|
value: [0.82142857 0.96428571 0.93103448 0.86206897 0.92857143 0.82142857
|
|
0.96428571 0.85714286 0.96428571 0.82142857]
|
|
|
|
mean value: 0.8935960591133005
|
|
|
|
key: train_recall
|
|
value: [0.98818898 0.98425197 0.98023715 0.97628458 0.98425197 0.98031496
|
|
0.97637795 0.97637795 0.98031496 0.97637795]
|
|
|
|
mean value: 0.9802978432043821
|
|
|
|
key: test_roc_auc
|
|
value: [0.85899015 0.9476601 0.92980296 0.78817734 0.875 0.89285714
|
|
0.92857143 0.89285714 0.875 0.91071429]
|
|
|
|
mean value: 0.8899630541871921
|
|
|
|
key: train_roc_auc
|
|
value: [0.9802605 0.96841057 0.97437055 0.97239426 0.97244094 0.97244094
|
|
0.96850394 0.96850394 0.97047244 0.96653543]
|
|
|
|
mean value: 0.9714333509694688
|
|
|
|
key: test_jcc
|
|
value: [0.74193548 0.9 0.87096774 0.67567568 0.78787879 0.79310345
|
|
0.87096774 0.8 0.79411765 0.82142857]
|
|
|
|
mean value: 0.8056075098059656
|
|
|
|
key: train_jcc
|
|
value: [0.96168582 0.93984962 0.95019157 0.94636015 0.9469697 0.94676806
|
|
0.93939394 0.93939394 0.94318182 0.93584906]
|
|
|
|
mean value: 0.944964368333254
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07569265 0.08306384 0.10068297 0.09462094 0.09405899 0.09870481
|
|
0.10795307 0.079494 0.09042645 0.0916388 ]
|
|
|
|
mean value: 0.09163365364074708
|
|
|
|
key: score_time
|
|
value: [0.02257919 0.03819609 0.03938317 0.02528214 0.02493191 0.03076863
|
|
0.02569342 0.03798985 0.02238894 0.02226901]
|
|
|
|
mean value: 0.028948235511779784
|
|
|
|
key: test_mcc
|
|
value: [0.71921182 0.78940887 0.78940887 0.77728159 0.79385662 0.78772636
|
|
0.75047877 0.78772636 0.78571429 0.82618439]
|
|
|
|
mean value: 0.7806997936854042
|
|
|
|
key: train_mcc
|
|
value: [0.97660594 0.98422085 0.97239426 0.99606293 0.98038334 0.99212598
|
|
0.97244848 0.98032256 0.98437404 0.98032256]
|
|
|
|
mean value: 0.981926094165461
|
|
|
|
key: test_accuracy
|
|
value: [0.85964912 0.89473684 0.89473684 0.87719298 0.89285714 0.89285714
|
|
0.875 0.89285714 0.89285714 0.91071429]
|
|
|
|
mean value: 0.8883458646616541
|
|
|
|
key: train_accuracy
|
|
value: [0.98816568 0.99211045 0.98619329 0.99802761 0.99015748 0.99606299
|
|
0.98622047 0.99015748 0.99212598 0.99015748]
|
|
|
|
mean value: 0.9909378931183898
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.85714286 0.89285714 0.89655172 0.89230769 0.9 0.88888889
|
|
0.87272727 0.88888889 0.89285714 0.90566038]
|
|
|
|
mean value: 0.8887881987166307
|
|
|
|
key: train_fscore
|
|
value: [0.98804781 0.99212598 0.98619329 0.9980198 0.99009901 0.99606299
|
|
0.98624754 0.99013807 0.9921875 0.99013807]
|
|
|
|
mean value: 0.9909260069236293
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.89285714 0.89655172 0.80555556 0.84375 0.92307692
|
|
0.88888889 0.92307692 0.89285714 0.96 ]
|
|
|
|
mean value: 0.8883757157593365
|
|
|
|
key: train_precision
|
|
value: [1. 0.99212598 0.98425197 1. 0.99601594 0.99606299
|
|
0.98431373 0.99209486 0.98449612 0.99209486]
|
|
|
|
mean value: 0.9921456453978232
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.89285714 0.89655172 1. 0.96428571 0.85714286
|
|
0.85714286 0.85714286 0.89285714 0.85714286]
|
|
|
|
mean value: 0.8932266009852217
|
|
|
|
key: train_recall
|
|
value: [0.97637795 0.99212598 0.98814229 0.99604743 0.98425197 0.99606299
|
|
0.98818898 0.98818898 1. 0.98818898]
|
|
|
|
mean value: 0.9897575550091812
|
|
|
|
key: test_roc_auc
|
|
value: [0.85960591 0.89470443 0.89470443 0.875 0.89285714 0.89285714
|
|
0.875 0.89285714 0.89285714 0.91071429]
|
|
|
|
mean value: 0.888115763546798
|
|
|
|
key: train_roc_auc
|
|
value: [0.98818898 0.99211042 0.98619713 0.99802372 0.99015748 0.99606299
|
|
0.98622047 0.99015748 0.99212598 0.99015748]
|
|
|
|
mean value: 0.9909402135009804
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.80645161 0.8125 0.80555556 0.81818182 0.8
|
|
0.77419355 0.8 0.80645161 0.82758621]
|
|
|
|
mean value: 0.8000920354827474
|
|
|
|
key: train_jcc
|
|
value: [0.97637795 0.984375 0.97276265 0.99604743 0.98039216 0.99215686
|
|
0.97286822 0.98046875 0.98449612 0.98046875]
|
|
|
|
mean value: 0.9820413890193457
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1569252 0.17710876 0.11801124 0.17078733 0.14464998 0.16066098
|
|
0.1587491 0.15894175 0.17035389 0.15861654]
|
|
|
|
mean value: 0.15748047828674316
|
|
|
|
key: score_time
|
|
value: [0.0252955 0.02528405 0.01534581 0.02544618 0.02482653 0.02480364
|
|
0.02481937 0.02474475 0.02482033 0.02477956]
|
|
|
|
mean value: 0.024016571044921876
|
|
|
|
key: test_mcc
|
|
value: [0.54592083 0.26576203 0.64901478 0.61405719 0.67900461 0.60753044
|
|
0.60753044 0.47951222 0.5 0.4330127 ]
|
|
|
|
mean value: 0.5381345227911679
|
|
|
|
key: train_mcc
|
|
value: [0.97239383 0.95661511 0.97239383 0.96847134 0.96850394 0.9606597
|
|
0.96850394 0.96062992 0.96463421 0.95670033]
|
|
|
|
mean value: 0.9649506153761174
|
|
|
|
key: test_accuracy
|
|
value: [0.77192982 0.63157895 0.8245614 0.80701754 0.83928571 0.80357143
|
|
0.80357143 0.73214286 0.75 0.71428571]
|
|
|
|
mean value: 0.7677944862155388
|
|
|
|
key: train_accuracy
|
|
value: [0.98619329 0.97830375 0.98619329 0.98422091 0.98425197 0.98031496
|
|
0.98425197 0.98031496 0.98228346 0.97834646]
|
|
|
|
mean value: 0.982467502213111
|
|
|
|
key: test_fscore
|
|
value: [0.75471698 0.6440678 0.82758621 0.81355932 0.84210526 0.80701754
|
|
0.80701754 0.69387755 0.75 0.69230769]
|
|
|
|
mean value: 0.7632255900877989
|
|
|
|
key: train_fscore
|
|
value: [0.98624754 0.97830375 0.98613861 0.98412698 0.98425197 0.98023715
|
|
0.98425197 0.98031496 0.98217822 0.978389 ]
|
|
|
|
mean value: 0.9824440157372347
|
|
|
|
key: test_precision
|
|
value: [0.8 0.61290323 0.82758621 0.8 0.82758621 0.79310345
|
|
0.79310345 0.80952381 0.75 0.75 ]
|
|
|
|
mean value: 0.7763806345675088
|
|
|
|
key: train_precision
|
|
value: [0.98431373 0.98023715 0.98809524 0.98804781 0.98425197 0.98412698
|
|
0.98425197 0.98031496 0.98804781 0.97647059]
|
|
|
|
mean value: 0.9838158205265586
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.67857143 0.82758621 0.82758621 0.85714286 0.82142857
|
|
0.82142857 0.60714286 0.75 0.64285714]
|
|
|
|
mean value: 0.7548029556650246
|
|
|
|
key: train_recall
|
|
value: [0.98818898 0.97637795 0.98418972 0.98023715 0.98425197 0.97637795
|
|
0.98425197 0.98031496 0.97637795 0.98031496]
|
|
|
|
mean value: 0.9810883570383742
|
|
|
|
key: test_roc_auc
|
|
value: [0.77093596 0.63238916 0.82450739 0.80665025 0.83928571 0.80357143
|
|
0.80357143 0.73214286 0.75 0.71428571]
|
|
|
|
mean value: 0.7677339901477832
|
|
|
|
key: train_roc_auc
|
|
value: [0.98618935 0.97830755 0.98618935 0.98421307 0.98425197 0.98031496
|
|
0.98425197 0.98031496 0.98228346 0.97834646]
|
|
|
|
mean value: 0.9824663097942797
|
|
|
|
key: test_jcc
|
|
value: [0.60606061 0.475 0.70588235 0.68571429 0.72727273 0.67647059
|
|
0.67647059 0.53125 0.6 0.52941176]
|
|
|
|
mean value: 0.6213532913165266
|
|
|
|
key: train_jcc
|
|
value: [0.97286822 0.95752896 0.97265625 0.96875 0.96899225 0.96124031
|
|
0.96899225 0.96138996 0.96498054 0.95769231]
|
|
|
|
mean value: 0.9655091044614122
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.78226233 0.77418399 0.77554059 0.77671647 0.78123641 0.77947402
|
|
0.77357435 0.77042961 0.76739311 0.7736764 ]
|
|
|
|
mean value: 0.7754487276077271
|
|
|
|
key: score_time
|
|
value: [0.00960779 0.00938892 0.0097959 0.00947571 0.01026034 0.00963187
|
|
0.00933957 0.00933719 0.00948882 0.00936842]
|
|
|
|
mean value: 0.009569454193115234
|
|
|
|
key: test_mcc
|
|
value: [0.75492611 0.79161589 0.8951918 0.73477227 0.83484711 0.78772636
|
|
0.89342711 0.75434227 0.78571429 0.8660254 ]
|
|
|
|
mean value: 0.8098588609769577
|
|
|
|
key: train_mcc
|
|
value: [0.99606293 1. 1. 1. 1. 1.
|
|
0.98825791 1. 0.99607071 1. ]
|
|
|
|
mean value: 0.998039155260922
|
|
|
|
key: test_accuracy
|
|
value: [0.87719298 0.89473684 0.94736842 0.85964912 0.91071429 0.89285714
|
|
0.94642857 0.875 0.89285714 0.92857143]
|
|
|
|
mean value: 0.9025375939849624
|
|
|
|
key: train_accuracy
|
|
value: [0.99802761 1. 1. 1. 1. 1.
|
|
0.99409449 1. 0.9980315 1. ]
|
|
|
|
mean value: 0.9990153597664198
|
|
|
|
key: test_fscore
|
|
value: [0.87719298 0.89655172 0.94915254 0.875 0.91803279 0.88888889
|
|
0.94736842 0.86792453 0.89285714 0.92307692]
|
|
|
|
mean value: 0.9036045940029672
|
|
|
|
key: train_fscore
|
|
value: [0.99803536 1. 1. 1. 1. 1.
|
|
0.99412916 1. 0.99803536 1. ]
|
|
|
|
mean value: 0.9990199885428241
|
|
|
|
key: test_precision
|
|
value: [0.86206897 0.86666667 0.93333333 0.8 0.84848485 0.92307692
|
|
0.93103448 0.92 0.89285714 1. ]
|
|
|
|
mean value: 0.8977522362694776
|
|
|
|
key: train_precision
|
|
value: [0.99607843 1. 1. 1. 1. 1.
|
|
0.98832685 1. 0.99607843 1. ]
|
|
|
|
mean value: 0.9980483710994126
|
|
|
|
key: test_recall
|
|
value: [0.89285714 0.92857143 0.96551724 0.96551724 1. 0.85714286
|
|
0.96428571 0.82142857 0.89285714 0.85714286]
|
|
|
|
mean value: 0.9145320197044335
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.87746305 0.8953202 0.94704433 0.85775862 0.91071429 0.89285714
|
|
0.94642857 0.875 0.89285714 0.92857143]
|
|
|
|
mean value: 0.9024014778325123
|
|
|
|
key: train_roc_auc
|
|
value: [0.99802372 1. 1. 1. 1. 1.
|
|
0.99409449 1. 0.9980315 1. ]
|
|
|
|
mean value: 0.9990149699666988
|
|
|
|
key: test_jcc
|
|
value: [0.78125 0.8125 0.90322581 0.77777778 0.84848485 0.8
|
|
0.9 0.76666667 0.80645161 0.85714286]
|
|
|
|
mean value: 0.8253499569426989
|
|
|
|
key: train_jcc
|
|
value: [0.99607843 1. 1. 1. 1. 1.
|
|
0.98832685 1. 0.99607843 1. ]
|
|
|
|
mean value: 0.9980483710994126
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03669214 0.03119016 0.03197002 0.03223586 0.03141832 0.03137159
|
|
0.03189373 0.03183317 0.03151441 0.03194046]
|
|
|
|
mean value: 0.03220598697662354
|
|
|
|
key: score_time
|
|
value: [0.01264286 0.01253939 0.01334548 0.01338148 0.01347089 0.01336074
|
|
0.01353168 0.01340079 0.01341271 0.0134902 ]
|
|
|
|
mean value: 0.013257622718811035
|
|
|
|
key: test_mcc
|
|
value: [0.34077863 0.3198163 0.22934666 0.41150331 0.60753044 0.36291503
|
|
0.49030429 0.34035165 0.28226724 0.16495722]
|
|
|
|
mean value: 0.35497707539948586
|
|
|
|
key: train_mcc
|
|
value: [0.6499903 0.55878204 0.91750484 0.84140501 0.92554839 0.75680606
|
|
0.79975164 0.57128868 0.5925064 0.60767774]
|
|
|
|
mean value: 0.7221261101497335
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.63157895 0.61403509 0.70175439 0.80357143 0.66071429
|
|
0.73214286 0.625 0.625 0.57142857]
|
|
|
|
mean value: 0.663189223057644
|
|
|
|
key: train_accuracy
|
|
value: [0.79684418 0.73767258 0.95857988 0.91518738 0.96259843 0.86417323
|
|
0.89173228 0.74606299 0.75984252 0.76968504]
|
|
|
|
mean value: 0.8402378511857616
|
|
|
|
key: test_fscore
|
|
value: [0.6122449 0.46153846 0.60714286 0.67924528 0.80701754 0.55813953
|
|
0.68085106 0.43243243 0.51162791 0.42857143]
|
|
|
|
mean value: 0.5778811410213133
|
|
|
|
key: train_fscore
|
|
value: [0.74567901 0.64533333 0.95791583 0.90752688 0.96207585 0.8428246
|
|
0.87964989 0.65963061 0.68393782 0.70076726]
|
|
|
|
mean value: 0.798534109344518
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.81818182 0.62962963 0.75 0.79310345 0.8
|
|
0.84210526 0.88888889 0.73333333 0.64285714]
|
|
|
|
mean value: 0.7612385238610284
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.97154472 0.99528302 0.9757085 1.
|
|
0.99014778 1. 1. 1. ]
|
|
|
|
mean value: 0.9932684019590602
|
|
|
|
key: test_recall
|
|
value: [0.53571429 0.32142857 0.5862069 0.62068966 0.82142857 0.42857143
|
|
0.57142857 0.28571429 0.39285714 0.32142857]
|
|
|
|
mean value: 0.48854679802955664
|
|
|
|
key: train_recall
|
|
value: [0.59448819 0.47637795 0.94466403 0.83399209 0.9488189 0.72834646
|
|
0.79133858 0.49212598 0.51968504 0.53937008]
|
|
|
|
mean value: 0.6869207307584576
|
|
|
|
key: test_roc_auc
|
|
value: [0.66440887 0.62623153 0.61453202 0.70320197 0.80357143 0.66071429
|
|
0.73214286 0.625 0.625 0.57142857]
|
|
|
|
mean value: 0.6626231527093596
|
|
|
|
key: train_roc_auc
|
|
value: [0.79724409 0.73818898 0.95855249 0.91502754 0.96259843 0.86417323
|
|
0.89173228 0.74606299 0.75984252 0.76968504]
|
|
|
|
mean value: 0.8403107590800162
|
|
|
|
key: test_jcc
|
|
value: [0.44117647 0.3 0.43589744 0.51428571 0.67647059 0.38709677
|
|
0.51612903 0.27586207 0.34375 0.27272727]
|
|
|
|
mean value: 0.41633953571510823
|
|
|
|
key: train_jcc
|
|
value: [0.59448819 0.47637795 0.91923077 0.83070866 0.92692308 0.72834646
|
|
0.78515625 0.49212598 0.51968504 0.53937008]
|
|
|
|
mean value: 0.6812412458358571
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0312562 0.03853512 0.03836489 0.03848076 0.03837538 0.03839111
|
|
0.03839993 0.03830624 0.03794885 0.03855133]
|
|
|
|
mean value: 0.037660980224609376
|
|
|
|
key: score_time
|
|
value: [0.018785 0.01887298 0.01868081 0.0187645 0.01875949 0.01886082
|
|
0.0188055 0.01875305 0.01884317 0.02204394]
|
|
|
|
mean value: 0.019116926193237304
|
|
|
|
key: test_mcc
|
|
value: [0.75492611 0.61805122 0.72064772 0.59060008 0.64450339 0.5728919
|
|
0.75434227 0.65814518 0.60753044 0.5728919 ]
|
|
|
|
mean value: 0.6494530207608915
|
|
|
|
key: train_mcc
|
|
value: [0.75774778 0.77730083 0.76527695 0.78409007 0.78149236 0.77878997
|
|
0.77733354 0.77405686 0.79405713 0.80015219]
|
|
|
|
mean value: 0.77902976780346
|
|
|
|
key: test_accuracy
|
|
value: [0.87719298 0.80701754 0.85964912 0.78947368 0.82142857 0.78571429
|
|
0.875 0.82142857 0.80357143 0.78571429]
|
|
|
|
mean value: 0.8226190476190476
|
|
|
|
key: train_accuracy
|
|
value: [0.87771203 0.88757396 0.8816568 0.89151874 0.88976378 0.88779528
|
|
0.88779528 0.88582677 0.89566929 0.8976378 ]
|
|
|
|
mean value: 0.8882949727437917
|
|
|
|
key: test_fscore
|
|
value: [0.87719298 0.81355932 0.86666667 0.8125 0.82758621 0.79310345
|
|
0.88135593 0.8 0.80701754 0.79310345]
|
|
|
|
mean value: 0.827208555066802
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.88257576 0.89184061 0.88549618 0.89402697 0.89353612 0.89265537
|
|
0.89142857 0.89015152 0.89981096 0.90298507]
|
|
|
|
mean value: 0.8924507137139092
|
|
|
|
key: test_precision
|
|
value: [0.86206897 0.77419355 0.83870968 0.74285714 0.8 0.76666667
|
|
0.83870968 0.90909091 0.79310345 0.76666667]
|
|
|
|
mean value: 0.8092066702300296
|
|
|
|
key: train_precision
|
|
value: [0.85036496 0.86080586 0.85608856 0.87218045 0.86397059 0.85559567
|
|
0.86346863 0.85766423 0.86545455 0.85815603]
|
|
|
|
mean value: 0.8603749534514598
|
|
|
|
key: test_recall
|
|
value: [0.89285714 0.85714286 0.89655172 0.89655172 0.85714286 0.82142857
|
|
0.92857143 0.71428571 0.82142857 0.82142857]
|
|
|
|
mean value: 0.8507389162561576
|
|
|
|
key: train_recall
|
|
value: [0.91732283 0.92519685 0.91699605 0.91699605 0.92519685 0.93307087
|
|
0.92125984 0.92519685 0.93700787 0.95275591]
|
|
|
|
mean value: 0.9270999968877408
|
|
|
|
key: test_roc_auc
|
|
value: [0.87746305 0.80788177 0.85899015 0.78756158 0.82142857 0.78571429
|
|
0.875 0.82142857 0.80357143 0.78571429]
|
|
|
|
mean value: 0.822475369458128
|
|
|
|
key: train_roc_auc
|
|
value: [0.87763375 0.88749961 0.88172637 0.89156889 0.88976378 0.88779528
|
|
0.88779528 0.88582677 0.89566929 0.8976378 ]
|
|
|
|
mean value: 0.888291680931188
|
|
|
|
key: test_jcc
|
|
value: [0.78125 0.68571429 0.76470588 0.68421053 0.70588235 0.65714286
|
|
0.78787879 0.66666667 0.67647059 0.65714286]
|
|
|
|
mean value: 0.7067064804390656
|
|
|
|
key: train_jcc
|
|
value: [0.78983051 0.80479452 0.79452055 0.80836237 0.80756014 0.80612245
|
|
0.80412371 0.80204778 0.81786942 0.82312925]
|
|
|
|
mean value: 0.8058360693160755
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.27312136 0.39750671 0.25851822 0.33367014 0.34442902 0.38743472
|
|
0.31063032 0.337183 0.31477523 0.14656138]
|
|
|
|
mean value: 0.3103830099105835
|
|
|
|
key: score_time
|
|
value: [0.01873636 0.02595735 0.01870203 0.01873875 0.02192521 0.0197928
|
|
0.01873064 0.02281976 0.01211023 0.01887131]
|
|
|
|
mean value: 0.019638442993164064
|
|
|
|
key: test_mcc
|
|
value: [0.75492611 0.61805122 0.72064772 0.6746955 0.64951905 0.60753044
|
|
0.79385662 0.61706091 0.67900461 0.60753044]
|
|
|
|
mean value: 0.6722822613093065
|
|
|
|
key: train_mcc
|
|
value: [0.70608376 0.73027603 0.73827912 0.72121611 0.73135099 0.74716398
|
|
0.7271421 0.73456416 0.7308486 0.74202305]
|
|
|
|
mean value: 0.730894790513989
|
|
|
|
key: test_accuracy
|
|
value: [0.87719298 0.80701754 0.85964912 0.8245614 0.82142857 0.80357143
|
|
0.89285714 0.80357143 0.83928571 0.80357143]
|
|
|
|
mean value: 0.8332706766917293
|
|
|
|
key: train_accuracy
|
|
value: [0.85207101 0.86390533 0.8678501 0.85996055 0.86417323 0.87204724
|
|
0.86220472 0.86614173 0.86417323 0.87007874]
|
|
|
|
mean value: 0.8642605879886316
|
|
|
|
key: test_fscore
|
|
value: [0.87719298 0.81355932 0.86666667 0.84848485 0.83333333 0.80701754
|
|
0.9 0.78431373 0.84210526 0.80701754]
|
|
|
|
mean value: 0.8379691229342277
|
|
|
|
key: train_fscore
|
|
value: [0.85768501 0.86956522 0.87286528 0.86372361 0.8700565 0.87758945
|
|
0.86792453 0.87121212 0.86956522 0.87452471]
|
|
|
|
mean value: 0.8694711643236573
|
|
|
|
key: test_precision
|
|
value: [0.86206897 0.77419355 0.83870968 0.75675676 0.78125 0.79310345
|
|
0.84375 0.86956522 0.82758621 0.79310345]
|
|
|
|
mean value: 0.814008726892003
|
|
|
|
key: train_precision
|
|
value: [0.82783883 0.83636364 0.83941606 0.83955224 0.83393502 0.84115523
|
|
0.83333333 0.83941606 0.83636364 0.84558824]
|
|
|
|
mean value: 0.8372962277495424
|
|
|
|
key: test_recall
|
|
value: [0.89285714 0.85714286 0.89655172 0.96551724 0.89285714 0.82142857
|
|
0.96428571 0.71428571 0.85714286 0.82142857]
|
|
|
|
mean value: 0.8683497536945812
|
|
|
|
key: train_recall
|
|
value: [0.88976378 0.90551181 0.90909091 0.88932806 0.90944882 0.91732283
|
|
0.90551181 0.90551181 0.90551181 0.90551181]
|
|
|
|
mean value: 0.9042513460520992
|
|
|
|
key: test_roc_auc
|
|
value: [0.87746305 0.80788177 0.85899015 0.82204433 0.82142857 0.80357143
|
|
0.89285714 0.80357143 0.83928571 0.80357143]
|
|
|
|
mean value: 0.8330665024630541
|
|
|
|
key: train_roc_auc
|
|
value: [0.85199651 0.8638231 0.86793128 0.86001836 0.86417323 0.87204724
|
|
0.86220472 0.86614173 0.86417323 0.87007874]
|
|
|
|
mean value: 0.8642588154741527
|
|
|
|
key: test_jcc
|
|
value: [0.78125 0.68571429 0.76470588 0.73684211 0.71428571 0.67647059
|
|
0.81818182 0.64516129 0.72727273 0.67647059]
|
|
|
|
mean value: 0.7226354999863813
|
|
|
|
key: train_jcc
|
|
value: [0.75083056 0.76923077 0.77441077 0.76013514 0.77 0.78187919
|
|
0.76666667 0.77181208 0.76923077 0.77702703]
|
|
|
|
mean value: 0.769122298165298
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04760361 0.05070686 0.04906511 0.05312705 0.04915905 0.05322313
|
|
0.05638933 0.04717588 0.05568767 0.04720044]
|
|
|
|
mean value: 0.05093381404876709
|
|
|
|
key: score_time
|
|
value: [0.01261592 0.01258874 0.01246285 0.01327252 0.01331091 0.0132916
|
|
0.01342154 0.01503658 0.01338744 0.01348686]
|
|
|
|
mean value: 0.013287496566772462
|
|
|
|
key: test_mcc
|
|
value: [0.66232872 0.753339 0.79114682 0.78337127 0.65351642 0.86955046
|
|
0.73952007 0.75312909 0.77800131 0.86872191]
|
|
|
|
mean value: 0.7652625078092732
|
|
|
|
key: train_mcc
|
|
value: [0.79479219 0.79967908 0.79507496 0.78663603 0.80660591 0.78481892
|
|
0.78911947 0.80140524 0.78940671 0.78388685]
|
|
|
|
mean value: 0.793142535496795
|
|
|
|
key: test_accuracy
|
|
value: [0.83006536 0.87581699 0.89542484 0.88888889 0.82352941 0.93464052
|
|
0.8627451 0.87581699 0.88815789 0.93421053]
|
|
|
|
mean value: 0.8809296525627796
|
|
|
|
key: train_accuracy
|
|
value: [0.89672727 0.89890909 0.89672727 0.89236364 0.90254545 0.89163636
|
|
0.89381818 0.89963636 0.89389535 0.89098837]
|
|
|
|
mean value: 0.8957247357293869
|
|
|
|
key: test_fscore
|
|
value: [0.83544304 0.87898089 0.8961039 0.89440994 0.83636364 0.93589744
|
|
0.87573964 0.88050314 0.89171975 0.93333333]
|
|
|
|
mean value: 0.8858494704128361
|
|
|
|
key: train_fscore
|
|
value: [0.89971751 0.90231904 0.9 0.89606742 0.90536723 0.89484827
|
|
0.89689266 0.90308989 0.8971831 0.89466292]
|
|
|
|
mean value: 0.8990148039708317
|
|
|
|
key: test_precision
|
|
value: [0.80487805 0.85185185 0.88461538 0.84705882 0.78409091 0.92405063
|
|
0.80434783 0.85365854 0.86419753 0.94594595]
|
|
|
|
mean value: 0.8564695490261903
|
|
|
|
key: train_precision
|
|
value: [0.875 0.87346939 0.87295082 0.86684783 0.87928669 0.86849315
|
|
0.87105624 0.8724559 0.87021858 0.86548913]
|
|
|
|
mean value: 0.8715267731703646
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.90789474 0.90789474 0.94736842 0.8961039 0.94805195
|
|
0.96103896 0.90909091 0.92105263 0.92105263]
|
|
|
|
mean value: 0.918796992481203
|
|
|
|
key: train_recall
|
|
value: [0.92587209 0.93313953 0.92877907 0.92732558 0.93304221 0.92285298
|
|
0.92430859 0.93595342 0.92587209 0.92587209]
|
|
|
|
mean value: 0.9283017670356454
|
|
|
|
key: test_roc_auc
|
|
value: [0.83031442 0.87602529 0.89550581 0.88926863 0.82305195 0.93455229
|
|
0.86209843 0.87559809 0.88815789 0.93421053]
|
|
|
|
mean value: 0.8808783321941217
|
|
|
|
key: train_roc_auc
|
|
value: [0.89670606 0.89888418 0.89670395 0.89233819 0.90256762 0.89165905
|
|
0.89384034 0.89966276 0.89389535 0.89098837]
|
|
|
|
mean value: 0.8957245861683761
|
|
|
|
key: test_jcc
|
|
value: [0.7173913 0.78409091 0.81176471 0.80898876 0.71875 0.87951807
|
|
0.77894737 0.78651685 0.8045977 0.875 ]
|
|
|
|
mean value: 0.7965565679158251
|
|
|
|
key: train_jcc
|
|
value: [0.81771502 0.82202305 0.81818182 0.81170483 0.82709677 0.80970626
|
|
0.81306018 0.82330346 0.81353768 0.8094028 ]
|
|
|
|
mean value: 0.8165731858989645
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.22887707 1.1061213 1.29676914 1.14623189 1.321275 1.24837685
|
|
1.16393399 1.32340837 1.12328601 1.25860143]
|
|
|
|
mean value: 1.2216881036758422
|
|
|
|
key: score_time
|
|
value: [0.01485205 0.01499057 0.01504302 0.01520014 0.01510501 0.01824522
|
|
0.01551437 0.01551175 0.01546621 0.01540995]
|
|
|
|
mean value: 0.015533828735351562
|
|
|
|
key: test_mcc
|
|
value: [0.68854091 0.753339 0.76582319 0.78337127 0.67765079 0.86955046
|
|
0.72432484 0.75213861 0.76342228 0.89597867]
|
|
|
|
mean value: 0.7674140019235717
|
|
|
|
key: train_mcc
|
|
value: [0.81772124 0.81658005 0.82192099 0.8122771 0.82905646 0.80037823
|
|
0.81179518 0.81243472 0.80798524 0.79912156]
|
|
|
|
mean value: 0.8129270770012365
|
|
|
|
key: test_accuracy
|
|
value: [0.84313725 0.87581699 0.88235294 0.88888889 0.83660131 0.93464052
|
|
0.85620915 0.87581699 0.88157895 0.94736842]
|
|
|
|
mean value: 0.8822411420708635
|
|
|
|
key: train_accuracy
|
|
value: [0.90836364 0.90763636 0.91054545 0.90545455 0.91418182 0.89963636
|
|
0.90545455 0.90545455 0.90334302 0.89898256]
|
|
|
|
mean value: 0.9059052854122621
|
|
|
|
key: test_fscore
|
|
value: [0.84810127 0.87898089 0.88461538 0.89440994 0.84662577 0.93589744
|
|
0.86904762 0.87898089 0.88311688 0.94594595]
|
|
|
|
mean value: 0.8865722022644907
|
|
|
|
key: train_fscore
|
|
value: [0.9106383 0.91024735 0.91257996 0.90819209 0.91583452 0.90212766
|
|
0.90753912 0.90819209 0.90600707 0.90162774]
|
|
|
|
mean value: 0.9082985895123753
|
|
|
|
key: test_precision
|
|
value: [0.81707317 0.85185185 0.8625 0.84705882 0.80232558 0.92405063
|
|
0.8021978 0.8625 0.87179487 0.97222222]
|
|
|
|
mean value: 0.8613574956634609
|
|
|
|
key: train_precision
|
|
value: [0.88919668 0.88583219 0.89290682 0.88324176 0.8979021 0.87966805
|
|
0.88734353 0.88203018 0.88170564 0.87862069]
|
|
|
|
mean value: 0.8858447624208464
|
|
|
|
key: test_recall
|
|
value: [0.88157895 0.90789474 0.90789474 0.94736842 0.8961039 0.94805195
|
|
0.94805195 0.8961039 0.89473684 0.92105263]
|
|
|
|
mean value: 0.9148838004101162
|
|
|
|
key: train_recall
|
|
value: [0.93313953 0.93604651 0.93313953 0.93459302 0.93449782 0.92576419
|
|
0.9286754 0.93595342 0.93168605 0.92587209]
|
|
|
|
mean value: 0.9319367573880369
|
|
|
|
key: test_roc_auc
|
|
value: [0.84338688 0.87602529 0.8825188 0.88926863 0.83620984 0.93455229
|
|
0.85560492 0.87568353 0.88157895 0.94736842]
|
|
|
|
mean value: 0.8822197539302803
|
|
|
|
key: train_roc_auc
|
|
value: [0.9083456 0.90761569 0.91052901 0.90543334 0.91419658 0.89965535
|
|
0.90547142 0.90547671 0.90334302 0.89898256]
|
|
|
|
mean value: 0.9059049287431028
|
|
|
|
key: test_jcc
|
|
value: [0.73626374 0.78409091 0.79310345 0.80898876 0.73404255 0.87951807
|
|
0.76842105 0.78409091 0.79069767 0.8974359 ]
|
|
|
|
mean value: 0.7976653016733087
|
|
|
|
key: train_jcc
|
|
value: [0.8359375 0.83527886 0.83921569 0.83182406 0.84473684 0.82170543
|
|
0.83072917 0.83182406 0.82816537 0.82087629]
|
|
|
|
mean value: 0.8320293267556449
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01956511 0.01418805 0.01397657 0.01359725 0.01381993 0.01363397
|
|
0.01389384 0.01413369 0.0137682 0.0138104 ]
|
|
|
|
mean value: 0.014438700675964356
|
|
|
|
key: score_time
|
|
value: [0.01292872 0.01001048 0.00960207 0.0095396 0.00947714 0.00961542
|
|
0.0096786 0.00963068 0.00963664 0.00969338]
|
|
|
|
mean value: 0.009981274604797363
|
|
|
|
key: test_mcc
|
|
value: [0.3677409 0.51396635 0.56751545 0.51721946 0.36546459 0.48591377
|
|
0.5187823 0.68055504 0.59630115 0.565301 ]
|
|
|
|
mean value: 0.5178760023538125
|
|
|
|
key: train_mcc
|
|
value: [0.55932935 0.52818862 0.52414825 0.52818862 0.54387803 0.52531549
|
|
0.53127949 0.52107302 0.51346701 0.52688095]
|
|
|
|
mean value: 0.5301748829818576
|
|
|
|
key: test_accuracy
|
|
value: [0.67973856 0.75163399 0.77777778 0.75816993 0.67973856 0.73856209
|
|
0.75816993 0.83660131 0.79605263 0.77631579]
|
|
|
|
mean value: 0.7552760577915376
|
|
|
|
key: train_accuracy
|
|
value: [0.77672727 0.76072727 0.75854545 0.76072727 0.768 0.76
|
|
0.76218182 0.75636364 0.75218023 0.76017442]
|
|
|
|
mean value: 0.7615627378435518
|
|
|
|
key: test_fscore
|
|
value: [0.63703704 0.72058824 0.75 0.74829932 0.65248227 0.71428571
|
|
0.74829932 0.82517483 0.78321678 0.75 ]
|
|
|
|
mean value: 0.7329383503967806
|
|
|
|
key: train_fscore
|
|
value: [0.7595928 0.74033149 0.73734177 0.74033149 0.74622116 0.74137931
|
|
0.74109264 0.73221423 0.7265437 0.73974763]
|
|
|
|
mean value: 0.7404796227190307
|
|
|
|
key: test_precision
|
|
value: [0.72881356 0.81666667 0.85 0.77464789 0.71875 0.79365079
|
|
0.78571429 0.89393939 0.8358209 0.85 ]
|
|
|
|
mean value: 0.8048003482139505
|
|
|
|
key: train_precision
|
|
value: [0.82342954 0.81001727 0.80902778 0.81001727 0.82280702 0.80305603
|
|
0.8125 0.81205674 0.81037567 0.80862069]
|
|
|
|
mean value: 0.8121908004481195
|
|
|
|
key: test_recall
|
|
value: [0.56578947 0.64473684 0.67105263 0.72368421 0.5974026 0.64935065
|
|
0.71428571 0.76623377 0.73684211 0.67105263]
|
|
|
|
mean value: 0.6740430622009569
|
|
|
|
key: train_recall
|
|
value: [0.70494186 0.68168605 0.67732558 0.68168605 0.68267831 0.68850073
|
|
0.68122271 0.66666667 0.65843023 0.68168605]
|
|
|
|
mean value: 0.6804824227345045
|
|
|
|
key: test_roc_auc
|
|
value: [0.67899863 0.75093985 0.77708476 0.757946 0.68028025 0.73914901
|
|
0.75845865 0.83706425 0.79605263 0.77631579]
|
|
|
|
mean value: 0.755228981544771
|
|
|
|
key: train_roc_auc
|
|
value: [0.77677952 0.7607848 0.75860457 0.7607848 0.76793799 0.75994804
|
|
0.76212298 0.75629845 0.75218023 0.76017442]
|
|
|
|
mean value: 0.7615615796689347
|
|
|
|
key: test_jcc
|
|
value: [0.4673913 0.56321839 0.6 0.59782609 0.48421053 0.55555556
|
|
0.59782609 0.70238095 0.64367816 0.6 ]
|
|
|
|
mean value: 0.5812087064237305
|
|
|
|
key: train_jcc
|
|
value: [0.61237374 0.5877193 0.5839599 0.5877193 0.59517766 0.5890411
|
|
0.58867925 0.57755359 0.57052897 0.58698373]
|
|
|
|
mean value: 0.587973653062591
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01435924 0.02336216 0.01930594 0.02446747 0.02200484 0.02586055
|
|
0.02032566 0.01927614 0.01912069 0.01915669]
|
|
|
|
mean value: 0.020723938941955566
|
|
|
|
key: score_time
|
|
value: [0.01252079 0.01283598 0.01281166 0.01281357 0.01296806 0.0128727
|
|
0.01315355 0.01288533 0.01286221 0.01288342]
|
|
|
|
mean value: 0.012860727310180665
|
|
|
|
key: test_mcc
|
|
value: [0.40045471 0.54773698 0.67583199 0.63496441 0.48008797 0.55890069
|
|
0.58168148 0.63496441 0.63157895 0.64748914]
|
|
|
|
mean value: 0.5793690728964541
|
|
|
|
key: train_mcc
|
|
value: [0.60282448 0.57928222 0.57928222 0.57429687 0.6059473 0.57957464
|
|
0.59009663 0.59090353 0.58472022 0.57535994]
|
|
|
|
mean value: 0.5862288043130421
|
|
|
|
key: test_accuracy
|
|
value: [0.69934641 0.77124183 0.83660131 0.81699346 0.73856209 0.77777778
|
|
0.79084967 0.81699346 0.81578947 0.82236842]
|
|
|
|
mean value: 0.7886523907808738
|
|
|
|
key: train_accuracy
|
|
value: [0.80072727 0.78909091 0.78909091 0.78618182 0.80218182 0.78909091
|
|
0.79418182 0.79490909 0.79142442 0.78706395]
|
|
|
|
mean value: 0.7923942917547568
|
|
|
|
key: test_fscore
|
|
value: [0.68055556 0.75177305 0.82758621 0.82051282 0.7260274 0.76712329
|
|
0.79220779 0.81333333 0.81578947 0.8137931 ]
|
|
|
|
mean value: 0.7808702020215437
|
|
|
|
key: train_fscore
|
|
value: [0.79398496 0.7826087 0.7826087 0.77727273 0.79456193 0.78129713
|
|
0.78576836 0.78828829 0.78274035 0.77986476]
|
|
|
|
mean value: 0.7848995905906386
|
|
|
|
key: test_precision
|
|
value: [0.72058824 0.81538462 0.86956522 0.8 0.76811594 0.8115942
|
|
0.79220779 0.83561644 0.81578947 0.85507246]
|
|
|
|
mean value: 0.8083934381013856
|
|
|
|
key: train_precision
|
|
value: [0.82242991 0.80804954 0.80804954 0.81170886 0.82574568 0.81064163
|
|
0.81861199 0.81395349 0.81674566 0.80715397]
|
|
|
|
mean value: 0.8143090246087948
|
|
|
|
key: test_recall
|
|
value: [0.64473684 0.69736842 0.78947368 0.84210526 0.68831169 0.72727273
|
|
0.79220779 0.79220779 0.81578947 0.77631579]
|
|
|
|
mean value: 0.756578947368421
|
|
|
|
key: train_recall
|
|
value: [0.76744186 0.75872093 0.75872093 0.74563953 0.76564774 0.75400291
|
|
0.75545852 0.76419214 0.75145349 0.75436047]
|
|
|
|
mean value: 0.7575638519345994
|
|
|
|
key: test_roc_auc
|
|
value: [0.6989918 0.77076213 0.83629528 0.81715653 0.73889269 0.77811005
|
|
0.79084074 0.81715653 0.81578947 0.82236842]
|
|
|
|
mean value: 0.7886363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [0.8007515 0.78911301 0.78911301 0.78621132 0.80215527 0.78906541
|
|
0.79415368 0.79488677 0.79142442 0.78706395]
|
|
|
|
mean value: 0.7923938339934329
|
|
|
|
key: test_jcc
|
|
value: [0.51578947 0.60227273 0.70588235 0.69565217 0.56989247 0.62222222
|
|
0.65591398 0.68539326 0.68888889 0.68604651]
|
|
|
|
mean value: 0.6427954060590045
|
|
|
|
key: train_jcc
|
|
value: [0.65835411 0.64285714 0.64285714 0.63568773 0.65914787 0.64108911
|
|
0.64713217 0.65055762 0.64303483 0.63916256]
|
|
|
|
mean value: 0.6459880289195492
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.017838 0.01399088 0.01374364 0.01474118 0.01386452 0.01430583
|
|
0.01341105 0.01312232 0.01267409 0.0128305 ]
|
|
|
|
mean value: 0.014052200317382812
|
|
|
|
key: score_time
|
|
value: [0.0403204 0.01716399 0.01853919 0.02205014 0.01906872 0.01816201
|
|
0.01837444 0.01736879 0.01735902 0.01744246]
|
|
|
|
mean value: 0.020584917068481444
|
|
|
|
key: test_mcc
|
|
value: [0.50635887 0.64763216 0.6485802 0.61341604 0.5519122 0.67362334
|
|
0.66090477 0.61091267 0.58630197 0.75006493]
|
|
|
|
mean value: 0.624970715963236
|
|
|
|
key: train_mcc
|
|
value: [0.77467726 0.75832498 0.75969198 0.76048109 0.8000725 0.75398264
|
|
0.76711236 0.77742712 0.7645408 0.75391236]
|
|
|
|
mean value: 0.7670223099930393
|
|
|
|
key: test_accuracy
|
|
value: [0.75163399 0.82352941 0.82352941 0.80392157 0.76470588 0.83660131
|
|
0.83006536 0.80392157 0.78947368 0.875 ]
|
|
|
|
mean value: 0.8102382180942552
|
|
|
|
key: train_accuracy
|
|
value: [0.88581818 0.87781818 0.87854545 0.87854545 0.89818182 0.87490909
|
|
0.88218182 0.88727273 0.88081395 0.875 ]
|
|
|
|
mean value: 0.8819086680761099
|
|
|
|
key: test_fscore
|
|
value: [0.7625 0.82580645 0.82802548 0.81481481 0.79545455 0.84076433
|
|
0.83544304 0.81481481 0.80487805 0.87417219]
|
|
|
|
mean value: 0.8196673707799911
|
|
|
|
key: train_fscore
|
|
value: [0.89074461 0.88284519 0.88346127 0.88410826 0.90277778 0.88105118
|
|
0.88687151 0.89183531 0.88579387 0.88105118]
|
|
|
|
mean value: 0.8870540143196411
|
|
|
|
key: test_precision
|
|
value: [0.72619048 0.81012658 0.80246914 0.76744186 0.70707071 0.825
|
|
0.81481481 0.77647059 0.75 0.88 ]
|
|
|
|
mean value: 0.7859584164857358
|
|
|
|
key: train_precision
|
|
value: [0.85447263 0.84852547 0.84966443 0.84594954 0.86321381 0.83926219
|
|
0.85234899 0.85656836 0.85026738 0.84036939]
|
|
|
|
mean value: 0.8500642193293324
|
|
|
|
key: test_recall
|
|
value: [0.80263158 0.84210526 0.85526316 0.86842105 0.90909091 0.85714286
|
|
0.85714286 0.85714286 0.86842105 0.86842105]
|
|
|
|
mean value: 0.8585782638414218
|
|
|
|
key: train_recall
|
|
value: [0.93023256 0.92005814 0.92005814 0.92587209 0.94614265 0.9272198
|
|
0.92430859 0.930131 0.9244186 0.92587209]
|
|
|
|
mean value: 0.9274313665752683
|
|
|
|
key: test_roc_auc
|
|
value: [0.75196514 0.82365003 0.82373548 0.8043404 0.76375598 0.83646617
|
|
0.82988722 0.80357143 0.78947368 0.875 ]
|
|
|
|
mean value: 0.8101845522898155
|
|
|
|
key: train_roc_auc
|
|
value: [0.88578586 0.87778744 0.87851524 0.87851101 0.89821667 0.87494711
|
|
0.88221243 0.88730387 0.88081395 0.875 ]
|
|
|
|
mean value: 0.8819093590264379
|
|
|
|
key: test_jcc
|
|
value: [0.61616162 0.7032967 0.70652174 0.6875 0.66037736 0.72527473
|
|
0.7173913 0.6875 0.67346939 0.77647059]
|
|
|
|
mean value: 0.6953963422692268
|
|
|
|
key: train_jcc
|
|
value: [0.80301129 0.79026217 0.79125 0.79228856 0.82278481 0.78739184
|
|
0.79673777 0.80478589 0.795 0.78739184]
|
|
|
|
mean value: 0.7970904176362799
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08229041 0.08397126 0.08443499 0.0849092 0.08254147 0.08595228
|
|
0.08338308 0.09927678 0.09544754 0.10268021]
|
|
|
|
mean value: 0.08848872184753417
|
|
|
|
key: score_time
|
|
value: [0.02667451 0.0271678 0.0274229 0.02746725 0.02686 0.02777171
|
|
0.02696133 0.02928233 0.02864861 0.03111315]
|
|
|
|
mean value: 0.0279369592666626
|
|
|
|
key: test_mcc
|
|
value: [0.58939196 0.73075678 0.753339 0.75152581 0.62965676 0.84423266
|
|
0.67107077 0.70070473 0.71675803 0.82901914]
|
|
|
|
mean value: 0.721645563151096
|
|
|
|
key: train_mcc
|
|
value: [0.77357629 0.76824023 0.75492621 0.77361577 0.77896577 0.75371122
|
|
0.76190353 0.75928355 0.75486693 0.73622485]
|
|
|
|
mean value: 0.761531433700545
|
|
|
|
key: test_accuracy
|
|
value: [0.79084967 0.8627451 0.87581699 0.86928105 0.81045752 0.92156863
|
|
0.83006536 0.8496732 0.85526316 0.91447368]
|
|
|
|
mean value: 0.8580194358445132
|
|
|
|
key: train_accuracy
|
|
value: [0.88436364 0.88145455 0.87490909 0.88363636 0.88727273 0.87345455
|
|
0.87854545 0.87636364 0.875 0.86555233]
|
|
|
|
mean value: 0.8780552325581396
|
|
|
|
key: test_fscore
|
|
value: [0.80487805 0.86956522 0.87898089 0.87951807 0.82634731 0.92405063
|
|
0.8452381 0.85534591 0.86419753 0.91390728]
|
|
|
|
mean value: 0.8662028991301498
|
|
|
|
key: train_fscore
|
|
value: [0.89057123 0.88812629 0.88186813 0.89071038 0.89288182 0.88130969
|
|
0.88490696 0.88387978 0.88170564 0.87302677]
|
|
|
|
mean value: 0.8848986692974512
|
|
|
|
key: test_precision
|
|
value: [0.75 0.82352941 0.85185185 0.81111111 0.76666667 0.90123457
|
|
0.78021978 0.82926829 0.81395349 0.92 ]
|
|
|
|
mean value: 0.824783517057037
|
|
|
|
key: train_precision
|
|
value: [0.84575163 0.84135241 0.8359375 0.84020619 0.85 0.82926829
|
|
0.84031414 0.83268983 0.83681462 0.82704811]
|
|
|
|
mean value: 0.837938272261832
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.92105263 0.90789474 0.96052632 0.8961039 0.94805195
|
|
0.92207792 0.88311688 0.92105263 0.90789474]
|
|
|
|
mean value: 0.9136192754613808
|
|
|
|
key: train_recall
|
|
value: [0.94040698 0.94040698 0.93313953 0.94767442 0.94032023 0.94032023
|
|
0.93449782 0.94177584 0.93168605 0.9244186 ]
|
|
|
|
mean value: 0.9374646677499069
|
|
|
|
key: test_roc_auc
|
|
value: [0.79135338 0.86312372 0.87602529 0.86987355 0.80989405 0.9213944
|
|
0.82946001 0.84945318 0.85526316 0.91447368]
|
|
|
|
mean value: 0.8580314422419686
|
|
|
|
key: train_roc_auc
|
|
value: [0.88432285 0.88141164 0.87486671 0.88358976 0.88731128 0.87350314
|
|
0.87858612 0.87641117 0.875 0.86555233]
|
|
|
|
mean value: 0.8780554991367929
|
|
|
|
key: test_jcc
|
|
value: [0.67346939 0.76923077 0.78409091 0.78494624 0.70408163 0.85882353
|
|
0.73195876 0.74725275 0.76086957 0.84146341]
|
|
|
|
mean value: 0.7656186954691628
|
|
|
|
key: train_jcc
|
|
value: [0.80272953 0.79876543 0.78869779 0.80295567 0.80649189 0.78780488
|
|
0.79357231 0.79192166 0.78843788 0.77466504]
|
|
|
|
mean value: 0.7936042080681655
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.54043746 6.50655675 3.90172577 5.35549545 5.01403522 3.69358182
|
|
5.10197377 5.09569311 3.6774075 3.76738024]
|
|
|
|
mean value: 4.465428709983826
|
|
|
|
key: score_time
|
|
value: [0.0129931 0.0150516 0.01331306 0.01484656 0.01529932 0.01318884
|
|
0.01307273 0.02309465 0.01365232 0.01310229]
|
|
|
|
mean value: 0.01476144790649414
|
|
|
|
key: test_mcc
|
|
value: [0.73075678 0.81960182 0.84344558 0.845814 0.7283738 0.91040218
|
|
0.69001885 0.77934127 0.83482162 0.97402153]
|
|
|
|
mean value: 0.8156597423699706
|
|
|
|
key: train_mcc
|
|
value: [0.86679607 0.97529838 0.94188241 0.9652375 0.97383454 0.93667175
|
|
0.94962366 0.95229201 0.9440412 0.94393734]
|
|
|
|
mean value: 0.9449614852658058
|
|
|
|
key: test_accuracy
|
|
value: [0.8627451 0.90849673 0.92156863 0.92156863 0.8627451 0.95424837
|
|
0.83660131 0.88888889 0.91447368 0.98684211]
|
|
|
|
mean value: 0.9058178534571724
|
|
|
|
key: train_accuracy
|
|
value: [0.93163636 0.98763636 0.97090909 0.98254545 0.98690909 0.968
|
|
0.97454545 0.976 0.97165698 0.97165698]
|
|
|
|
mean value: 0.972149577167019
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.91139241 0.92207792 0.92405063 0.86956522 0.95597484
|
|
0.85380117 0.89308176 0.91925466 0.98701299]
|
|
|
|
mean value: 0.9105776813597523
|
|
|
|
key: train_fscore
|
|
value: [0.93463143 0.98769008 0.97076023 0.98270893 0.98686131 0.96857143
|
|
0.97494631 0.97627606 0.97220242 0.97216274]
|
|
|
|
mean value: 0.9726810960468409
|
|
|
|
key: test_precision
|
|
value: [0.82352941 0.87804878 0.91025641 0.8902439 0.83333333 0.92682927
|
|
0.77659574 0.86585366 0.87058824 0.97435897]
|
|
|
|
mean value: 0.874963771944449
|
|
|
|
key: train_precision
|
|
value: [0.896 0.98412698 0.97647059 0.97428571 0.9897511 0.95091164
|
|
0.95915493 0.96448864 0.95384615 0.95511921]
|
|
|
|
mean value: 0.9604154960071852
|
|
|
|
key: test_recall
|
|
value: [0.92105263 0.94736842 0.93421053 0.96052632 0.90909091 0.98701299
|
|
0.94805195 0.92207792 0.97368421 1. ]
|
|
|
|
mean value: 0.9503075871496924
|
|
|
|
key: train_recall
|
|
value: [0.97674419 0.99127907 0.96511628 0.99127907 0.98398836 0.98689956
|
|
0.99126638 0.98835517 0.99127907 0.98982558]
|
|
|
|
mean value: 0.9856032717240445
|
|
|
|
key: test_roc_auc
|
|
value: [0.86312372 0.90874915 0.92165072 0.9218216 0.86244019 0.95403281
|
|
0.83586808 0.88867054 0.91447368 0.98684211]
|
|
|
|
mean value: 0.9057672590567327
|
|
|
|
key: train_roc_auc
|
|
value: [0.93160353 0.98763371 0.97091331 0.9825391 0.98690697 0.96801374
|
|
0.97455761 0.97600898 0.97165698 0.97165698]
|
|
|
|
mean value: 0.9721490894011713
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.8372093 0.85542169 0.85882353 0.76923077 0.91566265
|
|
0.74489796 0.80681818 0.85057471 0.97435897]
|
|
|
|
mean value: 0.838222853555279
|
|
|
|
key: train_jcc
|
|
value: [0.8772846 0.97567954 0.94318182 0.96600567 0.9740634 0.93905817
|
|
0.95111732 0.95365169 0.94590846 0.94583333]
|
|
|
|
mean value: 0.9471783991363041
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08296514 0.06868815 0.06392264 0.06326723 0.07574511 0.06084299
|
|
0.05777049 0.06035256 0.06168103 0.06090355]
|
|
|
|
mean value: 0.06561388969421386
|
|
|
|
key: score_time
|
|
value: [0.01025057 0.00945067 0.00942612 0.00938892 0.00951672 0.00942349
|
|
0.00942707 0.00941324 0.00944281 0.00937414]
|
|
|
|
mean value: 0.009511375427246093
|
|
|
|
key: test_mcc
|
|
value: [0.89574433 0.90921537 0.92445054 0.88243336 0.80939231 0.93661437
|
|
0.88599925 0.86208891 0.8732726 0.96060947]
|
|
|
|
mean value: 0.8939820513970249
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94771242 0.95424837 0.96078431 0.94117647 0.90196078 0.96732026
|
|
0.94117647 0.92810458 0.93421053 0.98026316]
|
|
|
|
mean value: 0.9456957344341246
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94805195 0.95483871 0.96202532 0.94117647 0.90797546 0.96855346
|
|
0.94409938 0.93251534 0.9375 0.98039216]
|
|
|
|
mean value: 0.9477128237183541
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.93589744 0.93670886 0.92682927 0.93506494 0.86046512 0.93902439
|
|
0.9047619 0.88372093 0.89285714 0.97402597]
|
|
|
|
mean value: 0.91893559584151
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.97368421 1. 0.94736842 0.96103896 1.
|
|
0.98701299 0.98701299 0.98684211 0.98684211]
|
|
|
|
mean value: 0.9790328092959671
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94779563 0.95437457 0.96103896 0.94121668 0.90157211 0.96710526
|
|
0.94087491 0.92771702 0.93421053 0.98026316]
|
|
|
|
mean value: 0.9456168831168831
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.90123457 0.91358025 0.92682927 0.88888889 0.83146067 0.93902439
|
|
0.89411765 0.87356322 0.88235294 0.96153846]
|
|
|
|
mean value: 0.9012590304562152
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22900677 0.21895313 0.22144055 0.21893716 0.21777987 0.21396589
|
|
0.21362853 0.21586347 0.21626425 0.21820951]
|
|
|
|
mean value: 0.2184049129486084
|
|
|
|
key: score_time
|
|
value: [0.02043247 0.02074599 0.02107549 0.02020907 0.02011371 0.02014065
|
|
0.019876 0.02024102 0.02034402 0.02013922]
|
|
|
|
mean value: 0.020331764221191408
|
|
|
|
key: test_mcc
|
|
value: [0.80741391 0.92285372 0.93537369 0.8580978 0.8270585 0.92280176
|
|
0.87189727 0.92186711 0.85712009 0.98692754]
|
|
|
|
mean value: 0.8911411385753999
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.90196078 0.96078431 0.96732026 0.92810458 0.90849673 0.96078431
|
|
0.93464052 0.96078431 0.92763158 0.99342105]
|
|
|
|
mean value: 0.9443928448572412
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90566038 0.96153846 0.96774194 0.92993631 0.91566265 0.96202532
|
|
0.9375 0.96153846 0.92993631 0.99346405]
|
|
|
|
mean value: 0.946500386672994
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.86746988 0.9375 0.94936709 0.90123457 0.85393258 0.9382716
|
|
0.90361446 0.94936709 0.90123457 0.98701299]
|
|
|
|
mean value: 0.9189004826587978
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.98684211 0.98684211 0.96052632 0.98701299 0.98701299
|
|
0.97402597 0.97402597 0.96052632 1. ]
|
|
|
|
mean value: 0.9764183185235816
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90225564 0.96095352 0.96744703 0.92831511 0.90798018 0.96061176
|
|
0.93438141 0.9606972 0.92763158 0.99342105]
|
|
|
|
mean value: 0.9443694463431306
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.82758621 0.92592593 0.9375 0.86904762 0.84444444 0.92682927
|
|
0.88235294 0.92592593 0.86904762 0.98701299]
|
|
|
|
mean value: 0.8995672937770227
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01479363 0.01480007 0.01652789 0.01446533 0.01471949 0.01451969
|
|
0.01533723 0.01431894 0.01434517 0.01419091]
|
|
|
|
mean value: 0.014801836013793946
|
|
|
|
key: score_time
|
|
value: [0.00957131 0.00958157 0.01046491 0.00946736 0.00996709 0.00950789
|
|
0.00931287 0.0093627 0.0093174 0.00931764]
|
|
|
|
mean value: 0.00958707332611084
|
|
|
|
key: test_mcc
|
|
value: [0.79785416 0.80095083 0.80464821 0.83365315 0.78171185 0.81941879
|
|
0.70565331 0.78091562 0.77962978 0.88782616]
|
|
|
|
mean value: 0.7992261869848796
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.89542484 0.89542484 0.89542484 0.91503268 0.88235294 0.90849673
|
|
0.84313725 0.88888889 0.88815789 0.94078947]
|
|
|
|
mean value: 0.8953130374957
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90123457 0.90243902 0.90361446 0.91823899 0.89411765 0.9125
|
|
0.86046512 0.89440994 0.89308176 0.94409938]
|
|
|
|
mean value: 0.9024200884947865
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84883721 0.84090909 0.83333333 0.87951807 0.8172043 0.87951807
|
|
0.77894737 0.85714286 0.85542169 0.89411765]
|
|
|
|
mean value: 0.8484949638568053
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96052632 0.97368421 0.98684211 0.96052632 0.98701299 0.94805195
|
|
0.96103896 0.93506494 0.93421053 1. ]
|
|
|
|
mean value: 0.9646958304853042
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.89584757 0.89593301 0.89601846 0.91532809 0.88166439 0.9082365
|
|
0.84236159 0.8885851 0.88815789 0.94078947]
|
|
|
|
mean value: 0.8952922077922078
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.82022472 0.82222222 0.82417582 0.84883721 0.80851064 0.83908046
|
|
0.75510204 0.80898876 0.80681818 0.89411765]
|
|
|
|
mean value: 0.8228077706607758
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.70653701 3.70080662 3.66520667 3.7132864 3.64379811 3.62841678
|
|
3.61334991 3.66238761 3.63429379 3.64054394]
|
|
|
|
mean value: 3.660862684249878
|
|
|
|
key: score_time
|
|
value: [0.11106348 0.10668039 0.10777426 0.11340523 0.1120317 0.10480928
|
|
0.11366653 0.10516834 0.15529108 0.10438061]
|
|
|
|
mean value: 0.11342709064483643
|
|
|
|
key: test_mcc
|
|
value: [0.85989239 0.93537369 0.92285372 0.90857826 0.83863043 0.97418375
|
|
0.88599925 0.98701078 0.88349301 0.98692754]
|
|
|
|
mean value: 0.9182942832953893
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92810458 0.96732026 0.96078431 0.95424837 0.91503268 0.9869281
|
|
0.94117647 0.99346405 0.94078947 0.99342105]
|
|
|
|
mean value: 0.9581269349845202
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93081761 0.96774194 0.96153846 0.95424837 0.92121212 0.98717949
|
|
0.94409938 0.99354839 0.94267516 0.99346405]
|
|
|
|
mean value: 0.9596524958991918
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89156627 0.94936709 0.9375 0.94805195 0.86363636 0.97468354
|
|
0.9047619 0.98717949 0.91358025 0.98701299]
|
|
|
|
mean value: 0.9357339835527905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97368421 0.98684211 0.98684211 0.96052632 0.98701299 1.
|
|
0.98701299 1. 0.97368421 1. ]
|
|
|
|
mean value: 0.9855604921394395
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92840055 0.96744703 0.96095352 0.95428913 0.91455913 0.98684211
|
|
0.94087491 0.99342105 0.94078947 0.99342105]
|
|
|
|
mean value: 0.9580997949419002
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.87058824 0.9375 0.92592593 0.9125 0.85393258 0.97468354
|
|
0.89411765 0.98717949 0.89156627 0.98701299]
|
|
|
|
mean value: 0.9235006676105043
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...05', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.30626822 1.34551883 1.34068537 1.35205746 1.3200345 1.374686
|
|
1.32856345 1.36582446 1.33214092 1.3070631 ]
|
|
|
|
mean value: 1.337284231185913
|
|
|
|
key: score_time
|
|
value: [0.31698656 0.29535389 0.29648471 0.28944588 0.27018976 0.30676222
|
|
0.2084434 0.2836206 0.27641726 0.23810649]
|
|
|
|
mean value: 0.2781810760498047
|
|
|
|
key: test_mcc
|
|
value: [0.85989239 0.89542037 0.86959495 0.8842875 0.78091562 0.96151265
|
|
0.87398511 0.93471203 0.88349301 0.97368421]
|
|
|
|
mean value: 0.8917497837702275
|
|
|
|
key: train_mcc
|
|
value: [0.94065016 0.94207233 0.93328341 0.94530204 0.94647215 0.93930788
|
|
0.94803867 0.94796228 0.94225868 0.94650992]
|
|
|
|
mean value: 0.943185752025907
|
|
|
|
key: test_accuracy
|
|
value: [0.92810458 0.94771242 0.93464052 0.94117647 0.88888889 0.98039216
|
|
0.93464052 0.96732026 0.94078947 0.98684211]
|
|
|
|
mean value: 0.9450507395940833
|
|
|
|
key: train_accuracy
|
|
value: [0.97018182 0.97090909 0.96654545 0.97236364 0.97309091 0.96945455
|
|
0.97381818 0.97381818 0.97093023 0.97311047]
|
|
|
|
mean value: 0.9714222515856237
|
|
|
|
key: test_fscore
|
|
value: [0.93081761 0.94736842 0.93506494 0.94267516 0.89440994 0.98089172
|
|
0.9382716 0.96774194 0.94267516 0.98684211]
|
|
|
|
mean value: 0.946675858797052
|
|
|
|
key: train_fscore
|
|
value: [0.97056712 0.97126437 0.96690647 0.97285714 0.97340043 0.96987088
|
|
0.97417504 0.97413793 0.9713467 0.97343862]
|
|
|
|
mean value: 0.9717964706791342
|
|
|
|
key: test_precision
|
|
value: [0.89156627 0.94736842 0.92307692 0.91358025 0.85714286 0.9625
|
|
0.89411765 0.96153846 0.91358025 0.98684211]
|
|
|
|
mean value: 0.9251313174020256
|
|
|
|
key: train_precision
|
|
value: [0.95886525 0.96022727 0.95726496 0.95646067 0.96164773 0.95615276
|
|
0.96039604 0.96170213 0.95762712 0.96170213]
|
|
|
|
mean value: 0.9592046051349344
|
|
|
|
key: test_recall
|
|
value: [0.97368421 0.94736842 0.94736842 0.97368421 0.93506494 1.
|
|
0.98701299 0.97402597 0.97368421 0.98684211]
|
|
|
|
mean value: 0.9698735475051264
|
|
|
|
key: train_recall
|
|
value: [0.98255814 0.98255814 0.97674419 0.98982558 0.98544396 0.98398836
|
|
0.98835517 0.98689956 0.98546512 0.98546512]
|
|
|
|
mean value: 0.9847303324193494
|
|
|
|
key: test_roc_auc
|
|
value: [0.92840055 0.94771018 0.93472317 0.94138756 0.8885851 0.98026316
|
|
0.93429597 0.96727614 0.94078947 0.98684211]
|
|
|
|
mean value: 0.9450273410799727
|
|
|
|
key: train_roc_auc
|
|
value: [0.97017281 0.97090061 0.96653803 0.97235093 0.97309989 0.96946511
|
|
0.97382875 0.97382769 0.97093023 0.97311047]
|
|
|
|
mean value: 0.9714224510003047
|
|
|
|
key: test_jcc
|
|
value: [0.87058824 0.9 0.87804878 0.89156627 0.80898876 0.9625
|
|
0.88372093 0.9375 0.89156627 0.97402597]
|
|
|
|
mean value: 0.8998505214205881
|
|
|
|
key: train_jcc
|
|
value: [0.94281729 0.94413408 0.93593315 0.94714882 0.94817927 0.94150418
|
|
0.94965035 0.94957983 0.94428969 0.94825175]
|
|
|
|
mean value: 0.9451488411338685
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.03018689 0.01959658 0.01925421 0.01920462 0.01933217 0.01940989
|
|
0.01946998 0.01902485 0.01904225 0.01904655]
|
|
|
|
mean value: 0.02035679817199707
|
|
|
|
key: score_time
|
|
value: [0.01297235 0.01271534 0.01296091 0.01273918 0.01278257 0.01274681
|
|
0.01279449 0.01274943 0.01269579 0.01272798]
|
|
|
|
mean value: 0.012788486480712891
|
|
|
|
key: test_mcc
|
|
value: [0.40045471 0.54773698 0.67583199 0.63496441 0.48008797 0.55890069
|
|
0.58168148 0.63496441 0.63157895 0.64748914]
|
|
|
|
mean value: 0.5793690728964541
|
|
|
|
key: train_mcc
|
|
value: [0.60282448 0.57928222 0.57928222 0.57429687 0.6059473 0.57957464
|
|
0.59009663 0.59090353 0.58472022 0.57535994]
|
|
|
|
mean value: 0.5862288043130421
|
|
|
|
key: test_accuracy
|
|
value: [0.69934641 0.77124183 0.83660131 0.81699346 0.73856209 0.77777778
|
|
0.79084967 0.81699346 0.81578947 0.82236842]
|
|
|
|
mean value: 0.7886523907808738
|
|
|
|
key: train_accuracy
|
|
value: [0.80072727 0.78909091 0.78909091 0.78618182 0.80218182 0.78909091
|
|
0.79418182 0.79490909 0.79142442 0.78706395]
|
|
|
|
mean value: 0.7923942917547568
|
|
|
|
key: test_fscore
|
|
value: [0.68055556 0.75177305 0.82758621 0.82051282 0.7260274 0.76712329
|
|
0.79220779 0.81333333 0.81578947 0.8137931 ]
|
|
|
|
mean value: 0.7808702020215437
|
|
|
|
key: train_fscore
|
|
value: [0.79398496 0.7826087 0.7826087 0.77727273 0.79456193 0.78129713
|
|
0.78576836 0.78828829 0.78274035 0.77986476]
|
|
|
|
mean value: 0.7848995905906386
|
|
|
|
key: test_precision
|
|
value: [0.72058824 0.81538462 0.86956522 0.8 0.76811594 0.8115942
|
|
0.79220779 0.83561644 0.81578947 0.85507246]
|
|
|
|
mean value: 0.8083934381013856
|
|
|
|
key: train_precision
|
|
value: [0.82242991 0.80804954 0.80804954 0.81170886 0.82574568 0.81064163
|
|
0.81861199 0.81395349 0.81674566 0.80715397]
|
|
|
|
mean value: 0.8143090246087948
|
|
|
|
key: test_recall
|
|
value: [0.64473684 0.69736842 0.78947368 0.84210526 0.68831169 0.72727273
|
|
0.79220779 0.79220779 0.81578947 0.77631579]
|
|
|
|
mean value: 0.756578947368421
|
|
|
|
key: train_recall
|
|
value: [0.76744186 0.75872093 0.75872093 0.74563953 0.76564774 0.75400291
|
|
0.75545852 0.76419214 0.75145349 0.75436047]
|
|
|
|
mean value: 0.7575638519345994
|
|
|
|
key: test_roc_auc
|
|
value: [0.6989918 0.77076213 0.83629528 0.81715653 0.73889269 0.77811005
|
|
0.79084074 0.81715653 0.81578947 0.82236842]
|
|
|
|
mean value: 0.7886363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [0.8007515 0.78911301 0.78911301 0.78621132 0.80215527 0.78906541
|
|
0.79415368 0.79488677 0.79142442 0.78706395]
|
|
|
|
mean value: 0.7923938339934329
|
|
|
|
key: test_jcc
|
|
value: [0.51578947 0.60227273 0.70588235 0.69565217 0.56989247 0.62222222
|
|
0.65591398 0.68539326 0.68888889 0.68604651]
|
|
|
|
mean value: 0.6427954060590045
|
|
|
|
key: train_jcc
|
|
value: [0.65835411 0.64285714 0.64285714 0.63568773 0.65914787 0.64108911
|
|
0.64713217 0.65055762 0.64303483 0.63916256]
|
|
|
|
mean value: 0.6459880289195492
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC0...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.20779777 0.19287062 0.19394255 0.18926311 0.1842463 0.29130864
|
|
0.17176795 0.17428422 0.23474813 0.20230126]
|
|
|
|
mean value: 0.20425305366516114
|
|
|
|
key: score_time
|
|
value: [0.01165557 0.0116055 0.01186323 0.01150799 0.01161933 0.01168489
|
|
0.01168442 0.01156092 0.01153088 0.01255465]
|
|
|
|
mean value: 0.011726737022399902
|
|
|
|
key: test_mcc
|
|
value: [0.92285372 0.93537369 0.90042249 0.92156528 0.8766998 0.96151265
|
|
0.88599925 0.97418375 0.92233098 0.96060947]
|
|
|
|
mean value: 0.9261551092115724
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96078431 0.96732026 0.94771242 0.96078431 0.93464052 0.98039216
|
|
0.94117647 0.9869281 0.96052632 0.98026316]
|
|
|
|
mean value: 0.9620528035775714
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96153846 0.96774194 0.95 0.96052632 0.93902439 0.98089172
|
|
0.94409938 0.98717949 0.96153846 0.98013245]
|
|
|
|
mean value: 0.9632672600731994
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.94936709 0.9047619 0.96052632 0.88505747 0.9625
|
|
0.9047619 0.97468354 0.9375 0.98666667]
|
|
|
|
mean value: 0.940332489615571
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.98684211 0.98684211 1. 0.96052632 1. 1.
|
|
0.98701299 1. 0.98684211 0.97368421]
|
|
|
|
mean value: 0.988174982911825
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96095352 0.96744703 0.94805195 0.96078264 0.93421053 0.98026316
|
|
0.94087491 0.98684211 0.96052632 0.98026316]
|
|
|
|
mean value: 0.9620215311004785
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.92592593 0.9375 0.9047619 0.92405063 0.88505747 0.9625
|
|
0.89411765 0.97468354 0.92592593 0.96103896]
|
|
|
|
mean value: 0.9295562013191099
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.08196235 0.09456229 0.08306575 0.07113528 0.09208322 0.08681965
|
|
0.08439732 0.07232714 0.11516023 0.08218455]
|
|
|
|
mean value: 0.08636977672576904
|
|
|
|
key: score_time
|
|
value: [0.02983594 0.01275492 0.01286411 0.01279593 0.0127902 0.012923
|
|
0.01285291 0.01281023 0.02002501 0.01260686]
|
|
|
|
mean value: 0.015225911140441894
|
|
|
|
key: test_mcc
|
|
value: [0.68055504 0.73965143 0.77842376 0.67547951 0.69455593 0.87042236
|
|
0.65621796 0.73947387 0.7642171 0.88165527]
|
|
|
|
mean value: 0.7480652242338058
|
|
|
|
key: train_mcc
|
|
value: [0.80430138 0.79984191 0.7953851 0.79336115 0.81890521 0.79027456
|
|
0.78836516 0.7746894 0.79916334 0.80034342]
|
|
|
|
mean value: 0.796463062295693
|
|
|
|
key: test_accuracy
|
|
value: [0.83660131 0.86928105 0.88888889 0.83006536 0.84313725 0.93464052
|
|
0.82352941 0.86928105 0.88157895 0.94078947]
|
|
|
|
mean value: 0.8717793257653939
|
|
|
|
key: train_accuracy
|
|
value: [0.90109091 0.89890909 0.89672727 0.89527273 0.90836364 0.89381818
|
|
0.89309091 0.88654545 0.89825581 0.89898256]
|
|
|
|
mean value: 0.8971056553911205
|
|
|
|
key: test_fscore
|
|
value: [0.84662577 0.87179487 0.89032258 0.8452381 0.85542169 0.93670886
|
|
0.83832335 0.87341772 0.88461538 0.94039735]
|
|
|
|
mean value: 0.8782865672476938
|
|
|
|
key: train_fscore
|
|
value: [0.90462833 0.90245614 0.9002809 0.89958159 0.91151685 0.8979021
|
|
0.89684211 0.8899859 0.90223464 0.90272918]
|
|
|
|
mean value: 0.9008157731023853
|
|
|
|
key: test_precision
|
|
value: [0.79310345 0.85 0.87341772 0.77173913 0.79775281 0.91358025
|
|
0.77777778 0.85185185 0.8625 0.94666667]
|
|
|
|
mean value: 0.8438389652428273
|
|
|
|
key: train_precision
|
|
value: [0.87398374 0.8724559 0.87092391 0.86461126 0.88059701 0.8640646
|
|
0.86585366 0.86320109 0.86827957 0.87044534]
|
|
|
|
mean value: 0.8694416100077345
|
|
|
|
key: test_recall
|
|
value: [0.90789474 0.89473684 0.90789474 0.93421053 0.92207792 0.96103896
|
|
0.90909091 0.8961039 0.90789474 0.93421053]
|
|
|
|
mean value: 0.9175153793574846
|
|
|
|
key: train_recall
|
|
value: [0.9375 0.93459302 0.93168605 0.9375 0.94468705 0.93449782
|
|
0.930131 0.91848617 0.93895349 0.9375 ]
|
|
|
|
mean value: 0.9345534595985241
|
|
|
|
key: test_roc_auc
|
|
value: [0.83706425 0.86944634 0.8890123 0.83074163 0.84261791 0.93446685
|
|
0.82296651 0.86910458 0.88157895 0.94078947]
|
|
|
|
mean value: 0.8717788790157212
|
|
|
|
key: train_roc_auc
|
|
value: [0.90106441 0.89888312 0.89670183 0.89524199 0.90839003 0.89384775
|
|
0.89311783 0.88656867 0.89825581 0.89898256]
|
|
|
|
mean value: 0.8971054001218646
|
|
|
|
key: test_jcc
|
|
value: [0.73404255 0.77272727 0.80232558 0.73195876 0.74736842 0.88095238
|
|
0.72164948 0.7752809 0.79310345 0.8875 ]
|
|
|
|
mean value: 0.784690880389407
|
|
|
|
key: train_jcc
|
|
value: [0.82586428 0.82225064 0.81864623 0.81749049 0.83741935 0.81472081
|
|
0.8129771 0.80177891 0.82188295 0.82270408]
|
|
|
|
mean value: 0.8195734849477988
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02136898 0.01806641 0.0181551 0.01815534 0.01805925 0.01814699
|
|
0.01815486 0.0180788 0.01778293 0.01779699]
|
|
|
|
mean value: 0.01837656497955322
|
|
|
|
key: score_time
|
|
value: [0.01268578 0.0124135 0.0124476 0.01247621 0.01248765 0.0124712
|
|
0.01245403 0.01243591 0.01241255 0.01240492]
|
|
|
|
mean value: 0.012468934059143066
|
|
|
|
key: test_mcc
|
|
value: [0.47710185 0.54590928 0.64745159 0.59531552 0.49042106 0.54254956
|
|
0.58233567 0.6601162 0.72525546 0.67819389]
|
|
|
|
mean value: 0.5944650061297241
|
|
|
|
key: train_mcc
|
|
value: [0.61604476 0.59423639 0.58991304 0.6044077 0.60595597 0.58690887
|
|
0.60736006 0.6117402 0.59475387 0.59304581]
|
|
|
|
mean value: 0.6004366647805309
|
|
|
|
key: test_accuracy
|
|
value: [0.73856209 0.77124183 0.82352941 0.79738562 0.74509804 0.77124183
|
|
0.79084967 0.83006536 0.86184211 0.83552632]
|
|
|
|
mean value: 0.7965342277261782
|
|
|
|
key: train_accuracy
|
|
value: [0.808 0.79709091 0.79490909 0.80218182 0.80290909 0.79345455
|
|
0.80363636 0.80581818 0.79723837 0.79651163]
|
|
|
|
mean value: 0.800175
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.75524476 0.81879195 0.8 0.75159236 0.77124183
|
|
0.79746835 0.83116883 0.86624204 0.82269504]
|
|
|
|
mean value: 0.795128725284666
|
|
|
|
key: train_fscore
|
|
value: [0.80701754 0.79590344 0.79325513 0.80116959 0.80058867 0.79330422
|
|
0.80176211 0.80382072 0.79409594 0.79562044]
|
|
|
|
mean value: 0.7986537807555537
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.80597015 0.83561644 0.78481013 0.7375 0.77631579
|
|
0.77777778 0.83116883 0.83950617 0.89230769]
|
|
|
|
mean value: 0.8017815083022823
|
|
|
|
key: train_precision
|
|
value: [0.81176471 0.8011782 0.80029586 0.80588235 0.80952381 0.79330422
|
|
0.80888889 0.8115727 0.8065967 0.79912023]
|
|
|
|
mean value: 0.8048127676266289
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.71052632 0.80263158 0.81578947 0.76623377 0.76623377
|
|
0.81818182 0.83116883 0.89473684 0.76315789]
|
|
|
|
mean value: 0.7905502392344498
|
|
|
|
key: train_recall
|
|
value: [0.80232558 0.79069767 0.78633721 0.79651163 0.79184862 0.79330422
|
|
0.79475983 0.79621543 0.78197674 0.79215116]
|
|
|
|
mean value: 0.7926128093158661
|
|
|
|
key: test_roc_auc
|
|
value: [0.73855092 0.77084757 0.82339371 0.79750513 0.74495899 0.77127478
|
|
0.79066986 0.8300581 0.86184211 0.83552632]
|
|
|
|
mean value: 0.7964627477785373
|
|
|
|
key: train_roc_auc
|
|
value: [0.80800413 0.79709556 0.79491533 0.80218594 0.80290105 0.79345444
|
|
0.80362991 0.8058112 0.79723837 0.79651163]
|
|
|
|
mean value: 0.8001747571172269
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.60674157 0.69318182 0.66666667 0.60204082 0.62765957
|
|
0.66315789 0.71111111 0.76404494 0.69879518]
|
|
|
|
mean value: 0.6616732912401211
|
|
|
|
key: train_jcc
|
|
value: [0.67647059 0.66099635 0.65735115 0.66829268 0.66748466 0.65741858
|
|
0.66911765 0.67199017 0.65850673 0.66060606]
|
|
|
|
mean value: 0.6648234631051324
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04263258 0.04277706 0.04444718 0.03485823 0.03388524 0.02703953
|
|
0.05764151 0.03592515 0.04396796 0.02910829]
|
|
|
|
mean value: 0.039228272438049314
|
|
|
|
key: score_time
|
|
value: [0.01132154 0.01255369 0.01256275 0.01255107 0.01257277 0.01249695
|
|
0.01278162 0.0125742 0.01253915 0.01247621]
|
|
|
|
mean value: 0.012442994117736816
|
|
|
|
key: test_mcc
|
|
value: [0.60309555 0.65176514 0.66849513 0.67996128 0.57484001 0.72681931
|
|
0.63412557 0.47638993 0.52223297 0.6092718 ]
|
|
|
|
mean value: 0.6146996696580245
|
|
|
|
key: train_mcc
|
|
value: [0.76349418 0.7255349 0.70388678 0.67814636 0.72663735 0.60334417
|
|
0.75762983 0.47743729 0.54273565 0.53850164]
|
|
|
|
mean value: 0.6517348148160317
|
|
|
|
key: test_accuracy
|
|
value: [0.79738562 0.81699346 0.82352941 0.83660131 0.77777778 0.8496732
|
|
0.81699346 0.68627451 0.73684211 0.77631579]
|
|
|
|
mean value: 0.7918386652906777
|
|
|
|
key: train_accuracy
|
|
value: [0.87927273 0.856 0.83781818 0.83054545 0.85454545 0.77527273
|
|
0.87781818 0.69381818 0.73546512 0.73473837]
|
|
|
|
mean value: 0.8075294397463002
|
|
|
|
key: test_fscore
|
|
value: [0.77697842 0.79104478 0.84210526 0.82269504 0.8045977 0.86857143
|
|
0.82051282 0.76237624 0.66666667 0.81521739]
|
|
|
|
mean value: 0.7970765737832929
|
|
|
|
key: train_fscore
|
|
value: [0.87211094 0.84083601 0.85787126 0.80948487 0.8687664 0.81329305
|
|
0.87311178 0.7628169 0.64796905 0.78692353]
|
|
|
|
mean value: 0.8133183799255528
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.9137931 0.75789474 0.89230769 0.72164948 0.7755102
|
|
0.81012658 0.616 0.90909091 0.69444444]
|
|
|
|
mean value: 0.794796001417248
|
|
|
|
key: train_precision
|
|
value: [0.92786885 0.94064748 0.76390465 0.92523364 0.79091995 0.69524793
|
|
0.90737834 0.62224265 0.96820809 0.65756098]
|
|
|
|
mean value: 0.8199212570334181
|
|
|
|
key: test_recall
|
|
value: [0.71052632 0.69736842 0.94736842 0.76315789 0.90909091 0.98701299
|
|
0.83116883 1. 0.52631579 0.98684211]
|
|
|
|
mean value: 0.8358851674641148
|
|
|
|
key: train_recall
|
|
value: [0.82267442 0.76017442 0.97819767 0.71947674 0.9636099 0.97962154
|
|
0.84133916 0.98544396 0.4869186 0.97965116]
|
|
|
|
mean value: 0.8517107579296571
|
|
|
|
key: test_roc_auc
|
|
value: [0.7968216 0.81621668 0.82433356 0.8361244 0.77691388 0.84876965
|
|
0.81690021 0.68421053 0.73684211 0.77631579]
|
|
|
|
mean value: 0.7913448393711551
|
|
|
|
key: train_roc_auc
|
|
value: [0.87931392 0.85606974 0.83771601 0.83062629 0.85462472 0.77542124
|
|
0.87779167 0.69403012 0.73546512 0.73473837]
|
|
|
|
mean value: 0.8075797197115873
|
|
|
|
key: test_jcc
|
|
value: [0.63529412 0.65432099 0.72727273 0.69879518 0.67307692 0.76767677
|
|
0.69565217 0.616 0.5 0.68807339]
|
|
|
|
mean value: 0.6656162272459145
|
|
|
|
key: train_jcc
|
|
value: [0.77322404 0.72538141 0.75111607 0.67994505 0.76798144 0.68533605
|
|
0.77479893 0.61657559 0.47925608 0.64870067]
|
|
|
|
mean value: 0.6902315345624748
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05076361 0.05958438 0.05333471 0.03971124 0.07210183 0.0543561
|
|
0.05373025 0.04330134 0.05012512 0.05790448]
|
|
|
|
mean value: 0.05349130630493164
|
|
|
|
key: score_time
|
|
value: [0.01090503 0.01258755 0.01262617 0.01249957 0.01194143 0.01539445
|
|
0.01372623 0.010741 0.01097751 0.01269054]
|
|
|
|
mean value: 0.012408947944641114
|
|
|
|
key: test_mcc
|
|
value: [0.59157909 0.7283738 0.732851 0.58391323 0.60990436 0.8319081
|
|
0.41249079 0.51809382 0.69791237 0.67213444]
|
|
|
|
mean value: 0.637916100617684
|
|
|
|
key: train_mcc
|
|
value: [0.65667291 0.803707 0.79287934 0.61561169 0.80001725 0.75223889
|
|
0.45809783 0.58851792 0.81355341 0.66660567]
|
|
|
|
mean value: 0.6947901916083767
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.8627451 0.8627451 0.76470588 0.80392157 0.91503268
|
|
0.64705882 0.7254902 0.84868421 0.81578947]
|
|
|
|
mean value: 0.8023950808393533
|
|
|
|
key: train_accuracy
|
|
value: [0.80945455 0.90181818 0.89454545 0.78254545 0.89672727 0.87563636
|
|
0.67781818 0.76654545 0.90625 0.81831395]
|
|
|
|
mean value: 0.8329654862579281
|
|
|
|
key: test_fscore
|
|
value: [0.80898876 0.85517241 0.85106383 0.80434783 0.7972973 0.91275168
|
|
0.74038462 0.78125 0.84563758 0.77777778]
|
|
|
|
mean value: 0.8174671785916895
|
|
|
|
key: train_fscore
|
|
value: [0.83686177 0.9012436 0.88922842 0.81867799 0.88958009 0.87229276
|
|
0.75484228 0.80720721 0.90380313 0.78668942]
|
|
|
|
mean value: 0.8460426660905662
|
|
|
|
key: test_precision
|
|
value: [0.70588235 0.89855072 0.92307692 0.68518519 0.83098592 0.94444444
|
|
0.58778626 0.65217391 0.8630137 0.98 ]
|
|
|
|
mean value: 0.8071099416993968
|
|
|
|
key: train_precision
|
|
value: [0.73202614 0.90721649 0.93719807 0.70239334 0.95492487 0.89570552
|
|
0.60892857 0.68711656 0.9280245 0.95247934]
|
|
|
|
mean value: 0.8306013419789139
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.81578947 0.78947368 0.97368421 0.76623377 0.88311688
|
|
1. 0.97402597 0.82894737 0.64473684]
|
|
|
|
mean value: 0.8623376623376623
|
|
|
|
key: train_recall
|
|
value: [0.97674419 0.89534884 0.84593023 0.98110465 0.83260553 0.85007278
|
|
0.99272198 0.97816594 0.88081395 0.67005814]
|
|
|
|
mean value: 0.8903566229985443
|
|
|
|
key: test_roc_auc
|
|
value: [0.77887902 0.86244019 0.86226931 0.76606288 0.80416951 0.91524265
|
|
0.64473684 0.72385509 0.84868421 0.81578947]
|
|
|
|
mean value: 0.8022129186602871
|
|
|
|
key: train_roc_auc
|
|
value: [0.80933279 0.90182289 0.89458084 0.78240094 0.89668067 0.87561779
|
|
0.67804704 0.76669925 0.90625 0.81831395]
|
|
|
|
mean value: 0.8329746157882265
|
|
|
|
key: test_jcc
|
|
value: [0.67924528 0.74698795 0.74074074 0.67272727 0.66292135 0.83950617
|
|
0.58778626 0.64102564 0.73255814 0.63636364]
|
|
|
|
mean value: 0.6939862445914369
|
|
|
|
key: train_jcc
|
|
value: [0.71948608 0.82023968 0.80055021 0.69301848 0.80112045 0.77350993
|
|
0.60622222 0.67673716 0.8244898 0.64838256]
|
|
|
|
mean value: 0.736375656860724
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.39936137 0.39739537 0.40303993 0.38758874 0.38009048 0.3800745
|
|
0.38136864 0.37947059 0.37942672 0.38146639]
|
|
|
|
mean value: 0.38692827224731446
|
|
|
|
key: score_time
|
|
value: [0.01705122 0.01843405 0.01854253 0.01659775 0.01685739 0.01700711
|
|
0.0167098 0.01705623 0.01666355 0.01681781]
|
|
|
|
mean value: 0.01717374324798584
|
|
|
|
key: test_mcc
|
|
value: [0.86959495 0.88243336 0.79114682 0.89668196 0.79751735 0.9353409
|
|
0.87398511 0.89542037 0.89504682 0.88226658]
|
|
|
|
mean value: 0.8719434212539181
|
|
|
|
key: train_mcc
|
|
value: [0.91328169 0.91148825 0.91743964 0.90183623 0.92326323 0.91499875
|
|
0.92334159 0.91170005 0.91475549 0.90437934]
|
|
|
|
mean value: 0.913648426310073
|
|
|
|
key: test_accuracy
|
|
value: [0.93464052 0.94117647 0.89542484 0.94771242 0.89542484 0.96732026
|
|
0.93464052 0.94771242 0.94736842 0.94078947]
|
|
|
|
mean value: 0.9352210182318541
|
|
|
|
key: train_accuracy
|
|
value: [0.95636364 0.95563636 0.95854545 0.95054545 0.96145455 0.95709091
|
|
0.96145455 0.95563636 0.95712209 0.95203488]
|
|
|
|
mean value: 0.9565884249471459
|
|
|
|
key: test_fscore
|
|
value: [0.93506494 0.94117647 0.8961039 0.94871795 0.90243902 0.96815287
|
|
0.9382716 0.94805195 0.94805195 0.93959732]
|
|
|
|
mean value: 0.9365627957585706
|
|
|
|
key: train_fscore
|
|
value: [0.95714286 0.95614666 0.95913978 0.95156695 0.96195262 0.95794726
|
|
0.96200717 0.9562724 0.95782702 0.95265423]
|
|
|
|
mean value: 0.9572656948478266
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.93506494 0.88461538 0.925 0.85057471 0.95
|
|
0.89411765 0.94805195 0.93589744 0.95890411]
|
|
|
|
mean value: 0.920530309599817
|
|
|
|
key: train_precision
|
|
value: [0.94101124 0.94594595 0.94625177 0.93296089 0.9490085 0.93854749
|
|
0.94774011 0.9420904 0.94233474 0.94050992]
|
|
|
|
mean value: 0.942640099169862
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.94736842 0.90789474 0.97368421 0.96103896 0.98701299
|
|
0.98701299 0.94805195 0.96052632 0.92105263]
|
|
|
|
mean value: 0.9541011619958988
|
|
|
|
key: train_recall
|
|
value: [0.97383721 0.96656977 0.97238372 0.97093023 0.97525473 0.97816594
|
|
0.97671033 0.97088792 0.97383721 0.96511628]
|
|
|
|
mean value: 0.9723693341457635
|
|
|
|
key: test_roc_auc
|
|
value: [0.93472317 0.94121668 0.89550581 0.94788107 0.89499316 0.9671907
|
|
0.93429597 0.94771018 0.94736842 0.94078947]
|
|
|
|
mean value: 0.9351674641148325
|
|
|
|
key: train_roc_auc
|
|
value: [0.95635092 0.95562841 0.95853538 0.95053062 0.96146457 0.95710623
|
|
0.96146563 0.95564745 0.95712209 0.95203488]
|
|
|
|
mean value: 0.9565886183609221
|
|
|
|
key: test_jcc
|
|
value: [0.87804878 0.88888889 0.81176471 0.90243902 0.82222222 0.9382716
|
|
0.88372093 0.90123457 0.90123457 0.88607595]
|
|
|
|
mean value: 0.88139012422119
|
|
|
|
key: train_jcc
|
|
value: [0.91780822 0.91597796 0.9214876 0.9076087 0.92669433 0.91928865
|
|
0.92679558 0.91620879 0.91906722 0.90958904]
|
|
|
|
mean value: 0.9180526082222029
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.23837304 0.25315309 0.26155829 0.26193166 0.26441669 0.25714302
|
|
0.26143622 0.26356649 0.25145388 0.25725651]
|
|
|
|
mean value: 0.2570288896560669
|
|
|
|
key: score_time
|
|
value: [0.04023337 0.03893018 0.03958535 0.04100394 0.03667855 0.03563356
|
|
0.02932739 0.04651427 0.04040456 0.04004693]
|
|
|
|
mean value: 0.03883581161499024
|
|
|
|
key: test_mcc
|
|
value: [0.90042249 0.92189371 0.84247383 0.89668196 0.81558518 0.90916914
|
|
0.88599925 0.90916914 0.89753825 0.94736842]
|
|
|
|
mean value: 0.8926301374021354
|
|
|
|
key: train_mcc
|
|
value: [0.99564583 0.99563741 0.99564583 0.99564583 0.99272833 0.99272833
|
|
0.99563742 0.98981921 0.99420285 0.99127907]
|
|
|
|
mean value: 0.9938970095220991
|
|
|
|
key: test_accuracy
|
|
value: [0.94771242 0.96078431 0.91503268 0.94771242 0.90196078 0.95424837
|
|
0.94117647 0.95424837 0.94736842 0.97368421]
|
|
|
|
mean value: 0.9443928448572412
|
|
|
|
key: train_accuracy
|
|
value: [0.99781818 0.99781818 0.99781818 0.99781818 0.99636364 0.99636364
|
|
0.99781818 0.99490909 0.99709302 0.99563953]
|
|
|
|
mean value: 0.9969459830866807
|
|
|
|
key: test_fscore
|
|
value: [0.95 0.96103896 0.92121212 0.94871795 0.91017964 0.95541401
|
|
0.94409938 0.95541401 0.94936709 0.97368421]
|
|
|
|
mean value: 0.94691273751812
|
|
|
|
key: train_fscore
|
|
value: [0.99782451 0.99782135 0.99782451 0.99782451 0.99636364 0.99636364
|
|
0.99781818 0.99490168 0.99710145 0.99563953]
|
|
|
|
mean value: 0.9969482996175537
|
|
|
|
key: test_precision
|
|
value: [0.9047619 0.94871795 0.85393258 0.925 0.84444444 0.9375
|
|
0.9047619 0.9375 0.91463415 0.97368421]
|
|
|
|
mean value: 0.9144937143823645
|
|
|
|
key: train_precision
|
|
value: [0.99565847 0.99709724 0.99565847 0.99565847 0.99563953 0.99563953
|
|
0.99709302 0.99562682 0.99421965 0.99563953]
|
|
|
|
mean value: 0.9957930743597814
|
|
|
|
key: test_recall
|
|
value: [1. 0.97368421 1. 0.97368421 0.98701299 0.97402597
|
|
0.98701299 0.97402597 0.98684211 0.97368421]
|
|
|
|
mean value: 0.9829972658920028
|
|
|
|
key: train_recall
|
|
value: [1. 0.99854651 1. 1. 0.99708879 0.99708879
|
|
0.9985444 0.99417758 1. 0.99563953]
|
|
|
|
mean value: 0.9981085609830405
|
|
|
|
key: test_roc_auc
|
|
value: [0.94805195 0.96086808 0.91558442 0.94788107 0.90140123 0.95411825
|
|
0.94087491 0.95411825 0.94736842 0.97368421]
|
|
|
|
mean value: 0.9443950786056049
|
|
|
|
key: train_roc_auc
|
|
value: [0.99781659 0.99781765 0.99781659 0.99781659 0.99636416 0.99636416
|
|
0.99781871 0.99490856 0.99709302 0.99563953]
|
|
|
|
mean value: 0.9969455587150062
|
|
|
|
key: test_jcc
|
|
value: [0.9047619 0.925 0.85393258 0.90243902 0.83516484 0.91463415
|
|
0.89411765 0.91463415 0.90361446 0.94871795]
|
|
|
|
mean value: 0.8997016694877671
|
|
|
|
key: train_jcc
|
|
value: [0.99565847 0.99565217 0.99565847 0.99565847 0.99275362 0.99275362
|
|
0.99564586 0.98985507 0.99421965 0.99131693]
|
|
|
|
mean value: 0.993917233945979
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.92119527 0.849967 0.79599428 0.90053678 0.84074235 0.81148815
|
|
0.81688619 0.87066102 0.75244617 0.86435032]
|
|
|
|
mean value: 0.8424267530441284
|
|
|
|
key: score_time
|
|
value: [0.05906892 0.06310463 0.04293704 0.05982184 0.06441712 0.06345344
|
|
0.0613308 0.06919122 0.0425725 0.06088424]
|
|
|
|
mean value: 0.058678174018859865
|
|
|
|
key: test_mcc
|
|
value: [0.70444953 0.83891994 0.88243336 0.80967851 0.6478391 0.83345743
|
|
0.80055798 0.77934127 0.73450949 0.92105263]
|
|
|
|
mean value: 0.795223925225032
|
|
|
|
key: train_mcc
|
|
value: [0.95087046 0.9481207 0.9435602 0.94521005 0.94058845 0.94214222
|
|
0.94521258 0.94945454 0.95381487 0.94666202]
|
|
|
|
mean value: 0.9465636083608279
|
|
|
|
key: test_accuracy
|
|
value: [0.8496732 0.91503268 0.94117647 0.90196078 0.81699346 0.91503268
|
|
0.89542484 0.88888889 0.86184211 0.96052632]
|
|
|
|
mean value: 0.8946551427588579
|
|
|
|
key: train_accuracy
|
|
value: [0.97527273 0.97381818 0.97163636 0.97236364 0.97018182 0.97090909
|
|
0.97236364 0.97454545 0.97674419 0.97311047]
|
|
|
|
mean value: 0.97309455602537
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.9202454 0.94117647 0.9068323 0.83529412 0.91925466
|
|
0.90361446 0.89308176 0.87272727 0.96052632]
|
|
|
|
mean value: 0.9009895608027257
|
|
|
|
key: train_fscore
|
|
value: [0.97560976 0.97424893 0.97200287 0.97281831 0.97048236 0.97126437
|
|
0.97277937 0.97487437 0.97704448 0.97351467]
|
|
|
|
mean value: 0.9734639487853827
|
|
|
|
key: test_precision
|
|
value: [0.81176471 0.86206897 0.93506494 0.85882353 0.76344086 0.88095238
|
|
0.84269663 0.86585366 0.80898876 0.96052632]
|
|
|
|
mean value: 0.8590180744628214
|
|
|
|
key: train_precision
|
|
value: [0.9631728 0.95915493 0.96028369 0.95774648 0.96011396 0.95886525
|
|
0.95768688 0.96175637 0.96458924 0.95909732]
|
|
|
|
mean value: 0.9602466921435573
|
|
|
|
key: test_recall
|
|
value: [0.90789474 0.98684211 0.94736842 0.96052632 0.92207792 0.96103896
|
|
0.97402597 0.92207792 0.94736842 0.96052632]
|
|
|
|
mean value: 0.9489747095010252
|
|
|
|
key: train_recall
|
|
value: [0.98837209 0.98982558 0.98401163 0.98837209 0.98107715 0.98398836
|
|
0.98835517 0.98835517 0.98982558 0.98837209]
|
|
|
|
mean value: 0.9870554906739786
|
|
|
|
key: test_roc_auc
|
|
value: [0.85005126 0.91549897 0.94121668 0.90234108 0.81630212 0.91473001
|
|
0.89490772 0.88867054 0.86184211 0.96052632]
|
|
|
|
mean value: 0.8946086807928912
|
|
|
|
key: train_roc_auc
|
|
value: [0.97526319 0.97380653 0.97162736 0.97235199 0.97018974 0.9709186
|
|
0.97237526 0.97455549 0.97674419 0.97311047]
|
|
|
|
mean value: 0.9730942799837514
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.85227273 0.88888889 0.82954545 0.71717172 0.85057471
|
|
0.82417582 0.80681818 0.77419355 0.92405063]
|
|
|
|
mean value: 0.8217691687814961
|
|
|
|
key: train_jcc
|
|
value: [0.95238095 0.94979079 0.94553073 0.94707521 0.94265734 0.94413408
|
|
0.94700139 0.95098039 0.95511921 0.94839609]
|
|
|
|
mean value: 0.9483066199683164
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.7783401 1.80123258 1.77555442 1.71916199 1.76687956 1.7351656
|
|
1.75510836 1.73693585 1.73637676 1.75813794]
|
|
|
|
mean value: 1.7562893152236938
|
|
|
|
key: score_time
|
|
value: [0.01149559 0.01082087 0.00985169 0.01020694 0.00983787 0.01080632
|
|
0.0098505 0.01017809 0.01009703 0.00981355]
|
|
|
|
mean value: 0.010295844078063965
|
|
|
|
key: test_mcc
|
|
value: [0.89823836 0.90921537 0.83120941 0.92189371 0.79506337 0.91227016
|
|
0.87398511 0.92186711 0.90986594 0.96060947]
|
|
|
|
mean value: 0.8934218010896575
|
|
|
|
key: train_mcc
|
|
value: [0.97841437 0.97117163 0.96558461 0.97266054 0.96968584 0.97550531
|
|
0.97699226 0.97253788 0.9726807 0.97403889]
|
|
|
|
mean value: 0.9729272038929038
|
|
|
|
key: test_accuracy
|
|
value: [0.94771242 0.95424837 0.90849673 0.96078431 0.89542484 0.95424837
|
|
0.93464052 0.96078431 0.95394737 0.98026316]
|
|
|
|
mean value: 0.9450550395596835
|
|
|
|
key: train_accuracy
|
|
value: [0.98909091 0.98545455 0.98254545 0.98618182 0.98472727 0.98763636
|
|
0.98836364 0.98618182 0.98619186 0.9869186 ]
|
|
|
|
mean value: 0.9863292283298097
|
|
|
|
key: test_fscore
|
|
value: [0.94936709 0.95483871 0.91566265 0.96103896 0.90123457 0.95652174
|
|
0.9382716 0.96153846 0.95541401 0.98039216]
|
|
|
|
mean value: 0.9474279953036386
|
|
|
|
key: train_fscore
|
|
value: [0.98921639 0.98563218 0.98283262 0.98636037 0.98488121 0.98776098
|
|
0.98848921 0.98630137 0.98636037 0.98705036]
|
|
|
|
mean value: 0.9864885066442688
|
|
|
|
key: test_precision
|
|
value: [0.91463415 0.93670886 0.84444444 0.94871795 0.85882353 0.91666667
|
|
0.89411765 0.94936709 0.92592593 0.97402597]
|
|
|
|
mean value: 0.91634322319601
|
|
|
|
key: train_precision
|
|
value: [0.97866287 0.97443182 0.96760563 0.97446809 0.97435897 0.97720798
|
|
0.9772404 0.97714286 0.97446809 0.97720798]
|
|
|
|
mean value: 0.9752794679807932
|
|
|
|
key: test_recall
|
|
value: [0.98684211 0.97368421 1. 0.97368421 0.94805195 1.
|
|
0.98701299 0.97402597 0.98684211 0.98684211]
|
|
|
|
mean value: 0.9816985645933014
|
|
|
|
key: train_recall
|
|
value: [1. 0.99709302 0.99854651 0.99854651 0.99563319 0.9985444
|
|
1. 0.99563319 0.99854651 0.99709302]
|
|
|
|
mean value: 0.9979636352865509
|
|
|
|
key: test_roc_auc
|
|
value: [0.94796651 0.95437457 0.90909091 0.96086808 0.89507861 0.95394737
|
|
0.93429597 0.9606972 0.95394737 0.98026316]
|
|
|
|
mean value: 0.9450529733424471
|
|
|
|
key: train_roc_auc
|
|
value: [0.98908297 0.98544607 0.98253381 0.98617282 0.9847352 0.98764429
|
|
0.98837209 0.98618869 0.98619186 0.9869186 ]
|
|
|
|
mean value: 0.9863286407027521
|
|
|
|
key: test_jcc
|
|
value: [0.90361446 0.91358025 0.84444444 0.925 0.82022472 0.91666667
|
|
0.88372093 0.92592593 0.91463415 0.96153846]
|
|
|
|
mean value: 0.900934999899555
|
|
|
|
key: train_jcc
|
|
value: [0.97866287 0.97167139 0.96624473 0.97308782 0.97021277 0.97581792
|
|
0.9772404 0.97297297 0.97308782 0.97443182]
|
|
|
|
mean value: 0.9733430503225475
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04682612 0.04779935 0.04758477 0.04885864 0.04845262 0.04762435
|
|
0.04974437 0.04802918 0.04707122 0.04721093]
|
|
|
|
mean value: 0.04792015552520752
|
|
|
|
key: score_time
|
|
value: [0.01351118 0.01357889 0.01370478 0.01355672 0.01368284 0.01349187
|
|
0.01389933 0.01360774 0.01363087 0.01369047]
|
|
|
|
mean value: 0.013635468482971192
|
|
|
|
key: test_mcc
|
|
value: [0.17530052 0.23335786 0.26272017 0.19281458 0.18500894 0.26617701
|
|
0.26617701 0.30672292 0.21229796 0.21971769]
|
|
|
|
mean value: 0.23202946430175564
|
|
|
|
key: train_mcc
|
|
value: [0.26008634 0.25201333 0.24373835 0.24707391 0.25164703 0.24338408
|
|
0.24505381 0.24001761 0.25173279 0.248452 ]
|
|
|
|
mean value: 0.24831992556733573
|
|
|
|
key: test_accuracy
|
|
value: [0.54248366 0.54901961 0.5620915 0.54248366 0.53594771 0.56862745
|
|
0.56862745 0.58823529 0.55263158 0.54605263]
|
|
|
|
mean value: 0.5556200550395597
|
|
|
|
key: train_accuracy
|
|
value: [0.56363636 0.56 0.55636364 0.55781818 0.55927273 0.55563636
|
|
0.55636364 0.55418182 0.55959302 0.55813953]
|
|
|
|
mean value: 0.5581005285412262
|
|
|
|
key: test_fscore
|
|
value: [0.67889908 0.68778281 0.69406393 0.68181818 0.68444444 0.7
|
|
0.7 0.70967742 0.68807339 0.68778281]
|
|
|
|
mean value: 0.6912542060482053
|
|
|
|
key: train_fscore
|
|
value: [0.69635628 0.69459869 0.69284995 0.69354839 0.69393939 0.69219144
|
|
0.69254032 0.69149472 0.69424823 0.69354839]
|
|
|
|
mean value: 0.6935315788723275
|
|
|
|
key: test_precision
|
|
value: [0.52112676 0.52413793 0.53146853 0.52083333 0.52027027 0.53846154
|
|
0.53846154 0.55 0.52816901 0.52413793]
|
|
|
|
mean value: 0.5297066848712065
|
|
|
|
key: train_precision
|
|
value: [0.53416149 0.5320959 0.53004622 0.5308642 0.53132251 0.52927581
|
|
0.52968389 0.52846154 0.5316847 0.5308642 ]
|
|
|
|
mean value: 0.5308460449410161
|
|
|
|
key: test_recall
|
|
value: [0.97368421 1. 1. 0.98684211 1. 1.
|
|
1. 1. 0.98684211 1. ]
|
|
|
|
mean value: 0.9947368421052631
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.54528366 0.55194805 0.56493506 0.5453691 0.53289474 0.56578947
|
|
0.56578947 0.58552632 0.55263158 0.54605263]
|
|
|
|
mean value: 0.555622009569378
|
|
|
|
key: train_roc_auc
|
|
value: [0.56331878 0.55967977 0.55604076 0.55749636 0.55959302 0.5559593
|
|
0.55668605 0.55450581 0.55959302 0.55813953]
|
|
|
|
mean value: 0.5581012406485901
|
|
|
|
key: test_jcc
|
|
value: [0.51388889 0.52413793 0.53146853 0.51724138 0.52027027 0.53846154
|
|
0.53846154 0.55 0.52447552 0.52413793]
|
|
|
|
mean value: 0.5282543533405603
|
|
|
|
key: train_jcc
|
|
value: [0.53416149 0.5320959 0.53004622 0.5308642 0.53132251 0.52927581
|
|
0.52968389 0.52846154 0.5316847 0.5308642 ]
|
|
|
|
mean value: 0.5308460449410161
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03238416 0.04225111 0.04231215 0.04227066 0.04233122 0.04200482
|
|
0.04231119 0.04217577 0.04202938 0.04259181]
|
|
|
|
mean value: 0.041266226768493654
|
|
|
|
key: score_time
|
|
value: [0.02246475 0.0250268 0.02002358 0.02009606 0.02007437 0.01997685
|
|
0.01998591 0.01998043 0.02005959 0.02007389]
|
|
|
|
mean value: 0.02077622413635254
|
|
|
|
key: test_mcc
|
|
value: [0.66398432 0.7271493 0.73965143 0.74806111 0.6415549 0.8562639
|
|
0.67107077 0.75312909 0.79056942 0.89597867]
|
|
|
|
mean value: 0.7487412926040657
|
|
|
|
key: train_mcc
|
|
value: [0.78479944 0.79062931 0.7846599 0.78064776 0.78788129 0.7746894
|
|
0.77746184 0.78343419 0.78987936 0.77101661]
|
|
|
|
mean value: 0.7825099107682967
|
|
|
|
key: test_accuracy
|
|
value: [0.83006536 0.8627451 0.86928105 0.86928105 0.81699346 0.92810458
|
|
0.83006536 0.87581699 0.89473684 0.94736842]
|
|
|
|
mean value: 0.8724458204334365
|
|
|
|
key: train_accuracy
|
|
value: [0.89163636 0.89454545 0.89163636 0.88945455 0.89309091 0.88654545
|
|
0.888 0.89090909 0.89389535 0.88444767]
|
|
|
|
mean value: 0.8904161205073996
|
|
|
|
key: test_fscore
|
|
value: [0.8375 0.86624204 0.87179487 0.87804878 0.8313253 0.92903226
|
|
0.8452381 0.88050314 0.8974359 0.94594595]
|
|
|
|
mean value: 0.87830663330426
|
|
|
|
key: train_fscore
|
|
value: [0.89499648 0.89781536 0.89484827 0.8931083 0.89640592 0.8899859
|
|
0.89124294 0.89421721 0.89761571 0.88857744]
|
|
|
|
mean value: 0.8938813512420467
|
|
|
|
key: test_precision
|
|
value: [0.79761905 0.83950617 0.85 0.81818182 0.7752809 0.92307692
|
|
0.78021978 0.85365854 0.875 0.97222222]
|
|
|
|
mean value: 0.8484765399621068
|
|
|
|
key: train_precision
|
|
value: [0.86867305 0.87140903 0.8696845 0.86512262 0.86885246 0.86320109
|
|
0.86556927 0.86730506 0.86720867 0.8579161 ]
|
|
|
|
mean value: 0.8664941857333534
|
|
|
|
key: test_recall
|
|
value: [0.88157895 0.89473684 0.89473684 0.94736842 0.8961039 0.93506494
|
|
0.92207792 0.90909091 0.92105263 0.92105263]
|
|
|
|
mean value: 0.9122863978127136
|
|
|
|
key: train_recall
|
|
value: [0.92296512 0.92587209 0.92151163 0.92296512 0.92576419 0.91848617
|
|
0.91848617 0.92285298 0.93023256 0.92151163]
|
|
|
|
mean value: 0.9230647659185538
|
|
|
|
key: test_roc_auc
|
|
value: [0.83039986 0.86295284 0.86944634 0.86978811 0.816473 0.92805878
|
|
0.82946001 0.87559809 0.89473684 0.94736842]
|
|
|
|
mean value: 0.8724282296650718
|
|
|
|
key: train_roc_auc
|
|
value: [0.89161356 0.89452265 0.89161462 0.88943016 0.89311465 0.88656867
|
|
0.88802216 0.89093231 0.89389535 0.88444767]
|
|
|
|
mean value: 0.890416180054839
|
|
|
|
key: test_jcc
|
|
value: [0.72043011 0.76404494 0.77272727 0.7826087 0.71134021 0.86746988
|
|
0.73195876 0.78651685 0.81395349 0.8974359 ]
|
|
|
|
mean value: 0.7848486108057365
|
|
|
|
key: train_jcc
|
|
value: [0.80994898 0.81457801 0.80970626 0.8068615 0.81226054 0.80177891
|
|
0.80382166 0.80867347 0.81424936 0.79949559]
|
|
|
|
mean value: 0.808137426138211
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'mcsm_ppi2_affinity',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=169)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.46023178 0.41017795 0.40954113 0.44649363 0.41236901 0.44450688
|
|
0.40110087 0.4120388 0.40145922 0.4024899 ]
|
|
|
|
mean value: 0.4200409173965454
|
|
|
|
key: score_time
|
|
value: [0.01972604 0.0197587 0.01972723 0.01971292 0.01977825 0.01973319
|
|
0.01967406 0.01975846 0.01968694 0.01958704]
|
|
|
|
mean value: 0.01971428394317627
|
|
|
|
key: test_mcc
|
|
value: [0.69246095 0.7271493 0.76499745 0.74806111 0.6415549 0.87042236
|
|
0.68612386 0.75312909 0.77692131 0.85593026]
|
|
|
|
mean value: 0.7516750603742108
|
|
|
|
key: train_mcc
|
|
value: [0.7993741 0.79062931 0.79215976 0.78064776 0.78788129 0.79678309
|
|
0.78620506 0.78343419 0.79814553 0.79093784]
|
|
|
|
mean value: 0.7906197939099966
|
|
|
|
key: test_accuracy
|
|
value: [0.84313725 0.8627451 0.88235294 0.86928105 0.81699346 0.93464052
|
|
0.83660131 0.87581699 0.88815789 0.92763158]
|
|
|
|
mean value: 0.8737358101135191
|
|
|
|
key: train_accuracy
|
|
value: [0.89890909 0.89454545 0.89527273 0.88945455 0.89309091 0.89745455
|
|
0.89236364 0.89090909 0.89825581 0.89462209]
|
|
|
|
mean value: 0.8944877906976744
|
|
|
|
key: test_fscore
|
|
value: [0.85185185 0.86624204 0.88311688 0.87804878 0.8313253 0.93670886
|
|
0.85207101 0.88050314 0.89032258 0.9261745 ]
|
|
|
|
mean value: 0.8796364943498117
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./rpob_cd_sl.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.90204369 0.89781536 0.89859155 0.8931083 0.89640592 0.9007741
|
|
0.89548023 0.89421721 0.90140845 0.89795918]
|
|
|
|
mean value: 0.8977803993247322
|
|
|
|
key: test_precision
|
|
value: [0.80232558 0.83950617 0.87179487 0.81818182 0.7752809 0.91358025
|
|
0.7826087 0.85365854 0.87341772 0.94520548]
|
|
|
|
mean value: 0.8475560023210111
|
|
|
|
key: train_precision
|
|
value: [0.875513 0.87140903 0.8715847 0.86512262 0.86885246 0.8719346
|
|
0.8696845 0.86730506 0.87431694 0.87039563]
|
|
|
|
mean value: 0.8706118538945805
|
|
|
|
key: test_recall
|
|
value: [0.90789474 0.89473684 0.89473684 0.94736842 0.8961039 0.96103896
|
|
0.93506494 0.90909091 0.90789474 0.90789474]
|
|
|
|
mean value: 0.9161825017088175
|
|
|
|
key: train_recall
|
|
value: [0.93023256 0.92587209 0.92732558 0.92296512 0.92576419 0.93158661
|
|
0.92285298 0.92285298 0.93023256 0.92732558]
|
|
|
|
mean value: 0.9267010256931045
|
|
|
|
key: test_roc_auc
|
|
value: [0.84355776 0.86295284 0.88243336 0.86978811 0.816473 0.93446685
|
|
0.83595352 0.87559809 0.88815789 0.92763158]
|
|
|
|
mean value: 0.8737012987012988
|
|
|
|
key: train_roc_auc
|
|
value: [0.89888629 0.89452265 0.8952494 0.88943016 0.89311465 0.89747935
|
|
0.89238579 0.89093231 0.89825581 0.89462209]
|
|
|
|
mean value: 0.8944878516299379
|
|
|
|
key: test_jcc
|
|
value: [0.74193548 0.76404494 0.79069767 0.7826087 0.71134021 0.88095238
|
|
0.74226804 0.78651685 0.80232558 0.8625 ]
|
|
|
|
mean value: 0.7865189861464965
|
|
|
|
key: train_jcc
|
|
value: [0.82156611 0.81457801 0.81585678 0.8068615 0.81226054 0.81946223
|
|
0.81074169 0.80867347 0.82051282 0.81481481]
|
|
|
|
mean value: 0.8145327949377648
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.83
|