19946 lines
992 KiB
Text
19946 lines
992 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_8020.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 817
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 817
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 175
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification [COMPLETE data]: 80/20
|
|
Original data size: (817, 175)
|
|
Train data size: (653, 175)
|
|
Test data size: (164, 175)
|
|
y_train numbers: Counter({0: 378, 1: 275})
|
|
y_train ratio: 1.3745454545454545
|
|
|
|
y_test_numbers: Counter({0: 95, 1: 69})
|
|
y_test ratio: 1.3768115942028984
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({0: 378, 1: 275}) Data dim: (653, 175)
|
|
|
|
Simple Random OverSampling
|
|
Counter({0: 378, 1: 378})
|
|
(756, 175)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 275, 1: 275})
|
|
(550, 175)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 378, 1: 378})
|
|
(756, 175)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 378, 1: 378})
|
|
(756, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis [COMPLETE DATA]: 80/20 split
|
|
Gene name: katG
|
|
Drug name: isoniazid
|
|
|
|
Output directory: /home/tanu/git/Data/isoniazid/output/ml/tts_cd_8020/
|
|
|
|
Sanity checks:
|
|
Total input features: 175
|
|
|
|
Training data size: (653, 175)
|
|
Test data size: (164, 175)
|
|
|
|
Target feature numbers (training data): Counter({0: 378, 1: 275})
|
|
Target features ratio (training data: 1.3745454545454545
|
|
|
|
Target feature numbers (test data): Counter({0: 95, 1: 69})
|
|
Target features ratio (test data): 1.3768115942028984
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09506774 0.12992549 0.14188218 0.0802834 0.07963109 0.12053657
|
|
0.08331847 0.07520509 0.08668733 0.08176041]
|
|
|
|
mean value: 0.09742977619171142
|
|
|
|
key: score_time
|
|
value: [0.03206778 0.03515911 0.02622914 0.02262688 0.01902652 0.01511431
|
|
0.02534556 0.0289166 0.01962161 0.02166963]
|
|
|
|
mean value: 0.02457771301269531
|
|
|
|
key: test_mcc
|
|
value: [0.66638826 0.59368892 0.62357561 0.5623401 0.68323587 0.49317739
|
|
0.52764927 0.50773831 0.6219883 0.62526656]
|
|
|
|
mean value: 0.5905048594307488
|
|
|
|
key: train_mcc
|
|
value: [0.67516017 0.67847106 0.69212657 0.67888818 0.68326442 0.7008631
|
|
0.7012731 0.69718926 0.72184372 0.70792501]
|
|
|
|
mean value: 0.6937004596112026
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.8030303 0.8030303 0.78461538 0.84615385 0.75384615
|
|
0.76923077 0.75384615 0.81538462 0.81538462]
|
|
|
|
mean value: 0.7977855477855478
|
|
|
|
key: train_accuracy
|
|
value: [0.84156729 0.84327087 0.85008518 0.84353741 0.8452381 0.8537415
|
|
0.8537415 0.85204082 0.86394558 0.85714286]
|
|
|
|
mean value: 0.8504311094113965
|
|
|
|
key: test_fscore
|
|
value: [0.81355932 0.75471698 0.79365079 0.75 0.81481481 0.7037037
|
|
0.72727273 0.72413793 0.76923077 0.76 ]
|
|
|
|
mean value: 0.7611087042873266
|
|
|
|
key: train_fscore
|
|
value: [0.81212121 0.81376518 0.82113821 0.81376518 0.81763527 0.828
|
|
0.82868526 0.8256513 0.84 0.832 ]
|
|
|
|
mean value: 0.8232761619986232
|
|
|
|
key: test_precision
|
|
value: [0.77419355 0.8 0.71428571 0.72413793 0.81481481 0.7037037
|
|
0.71428571 0.67741935 0.83333333 0.86363636]
|
|
|
|
mean value: 0.7619810478319933
|
|
|
|
key: train_precision
|
|
value: [0.81048387 0.81376518 0.8244898 0.81707317 0.812749 0.82142857
|
|
0.81889764 0.82071713 0.83003953 0.82213439]
|
|
|
|
mean value: 0.8191778277529544
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.71428571 0.89285714 0.77777778 0.81481481 0.7037037
|
|
0.74074074 0.77777778 0.71428571 0.67857143]
|
|
|
|
mean value: 0.7671957671957672
|
|
|
|
key: train_recall
|
|
value: [0.81376518 0.81376518 0.81781377 0.81048387 0.82258065 0.83467742
|
|
0.83870968 0.83064516 0.85020243 0.84210526]
|
|
|
|
mean value: 0.8274748596055896
|
|
|
|
key: test_roc_auc
|
|
value: [0.83646617 0.79135338 0.81484962 0.78362573 0.84161793 0.74658869
|
|
0.76510721 0.75730994 0.8030888 0.79874517]
|
|
|
|
mean value: 0.7938752662436873
|
|
|
|
key: train_roc_auc
|
|
value: [0.83776494 0.83923553 0.84567159 0.83906546 0.84217268 0.85116224
|
|
0.85170778 0.84914611 0.86205136 0.85507023]
|
|
|
|
mean value: 0.8473047922316373
|
|
|
|
key: test_jcc
|
|
value: [0.68571429 0.60606061 0.65789474 0.6 0.6875 0.54285714
|
|
0.57142857 0.56756757 0.625 0.61290323]
|
|
|
|
mean value: 0.615692613627673
|
|
|
|
key: train_jcc
|
|
value: [0.68367347 0.68600683 0.69655172 0.68600683 0.69152542 0.70648464
|
|
0.70748299 0.70307167 0.72413793 0.71232877]
|
|
|
|
mean value: 0.6997270274479856
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.21130204 2.27629972 2.25938749 2.27832174 1.98243165 2.23027658
|
|
1.26302624 1.77205515 1.70650458 2.25391293]
|
|
|
|
mean value: 2.023351812362671
|
|
|
|
key: score_time
|
|
value: [0.02263093 0.03681421 0.01885271 0.02622771 0.03132725 0.02604389
|
|
0.012532 0.02827764 0.06136584 0.02293944]
|
|
|
|
mean value: 0.028701162338256835
|
|
|
|
key: test_mcc
|
|
value: [0.56578947 0.7518797 0.50934254 0.61988304 0.68323587 0.55653021
|
|
0.53471781 0.52764927 0.55808092 0.60216988]
|
|
|
|
mean value: 0.5909278719059823
|
|
|
|
key: train_mcc
|
|
value: [0.81874752 0.81540658 0.85001596 0.82751326 0.78719894 0.86180638
|
|
0.80137034 0.85712774 0.77717665 0.80204941]
|
|
|
|
mean value: 0.8198412791384827
|
|
|
|
key: test_accuracy
|
|
value: [0.78787879 0.87878788 0.75757576 0.81538462 0.84615385 0.78461538
|
|
0.76923077 0.76923077 0.78461538 0.8 ]
|
|
|
|
mean value: 0.7993473193473194
|
|
|
|
key: train_accuracy
|
|
value: [0.91141397 0.90971039 0.92674617 0.91496599 0.8962585 0.93197279
|
|
0.90306122 0.93027211 0.89115646 0.90306122]
|
|
|
|
mean value: 0.9118618827428757
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.85714286 0.72413793 0.77777778 0.81481481 0.74074074
|
|
0.73684211 0.72727273 0.74074074 0.72340426]
|
|
|
|
mean value: 0.7592873950106448
|
|
|
|
key: train_fscore
|
|
value: [0.89558233 0.89378758 0.91348089 0.9015748 0.87676768 0.92094862
|
|
0.88531187 0.91750503 0.87148594 0.88622754]
|
|
|
|
mean value: 0.8962672276391245
|
|
|
|
key: test_precision
|
|
value: [0.75 0.85714286 0.7 0.77777778 0.81481481 0.74074074
|
|
0.7 0.71428571 0.76923077 0.89473684]
|
|
|
|
mean value: 0.7718729516097937
|
|
|
|
key: train_precision
|
|
value: [0.88844622 0.88492063 0.908 0.88076923 0.87854251 0.90310078
|
|
0.88353414 0.91566265 0.86454183 0.87401575]
|
|
|
|
mean value: 0.8881533733993977
|
|
|
|
key: test_recall
|
|
value: [0.75 0.85714286 0.75 0.77777778 0.81481481 0.74074074
|
|
0.77777778 0.74074074 0.71428571 0.60714286]
|
|
|
|
mean value: 0.7530423280423281
|
|
|
|
key: train_recall
|
|
value: [0.90283401 0.90283401 0.91902834 0.9233871 0.875 0.93951613
|
|
0.88709677 0.91935484 0.87854251 0.89878543]
|
|
|
|
mean value: 0.9046379130207653
|
|
|
|
key: test_roc_auc
|
|
value: [0.78289474 0.87593985 0.75657895 0.80994152 0.84161793 0.77826511
|
|
0.77046784 0.76510721 0.77606178 0.7765444 ]
|
|
|
|
mean value: 0.7933419321577216
|
|
|
|
key: train_roc_auc
|
|
value: [0.91024053 0.90876995 0.92569064 0.91610531 0.89338235 0.93299336
|
|
0.90090133 0.92879507 0.88941788 0.90247189]
|
|
|
|
mean value: 0.910876831273191
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.75 0.56756757 0.63636364 0.6875 0.58823529
|
|
0.58333333 0.57142857 0.58823529 0.56666667]
|
|
|
|
mean value: 0.6139330363595069
|
|
|
|
key: train_jcc
|
|
value: [0.81090909 0.80797101 0.84074074 0.82078853 0.78057554 0.85347985
|
|
0.79422383 0.84758364 0.77224199 0.79569892]
|
|
|
|
mean value: 0.8124213157107957
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02878189 0.02667975 0.02662826 0.01523542 0.01521683 0.0152421
|
|
0.0153563 0.01535201 0.01521778 0.01515055]
|
|
|
|
mean value: 0.018886089324951172
|
|
|
|
key: score_time
|
|
value: [0.03291011 0.02099562 0.03268838 0.01280093 0.01283741 0.01280856
|
|
0.01284051 0.01288533 0.01283646 0.01261544]
|
|
|
|
mean value: 0.017621874809265137
|
|
|
|
key: test_mcc
|
|
value: [0.33729285 0.43929506 0.2806866 0.46727593 0.46727593 0.4094686
|
|
0.30311891 0.3797245 0.50336704 0.55610073]
|
|
|
|
mean value: 0.41436061455359924
|
|
|
|
key: train_mcc
|
|
value: [0.45317181 0.46509582 0.50159668 0.47524088 0.48588424 0.45539376
|
|
0.47504757 0.49866514 0.40551043 0.48952847]
|
|
|
|
mean value: 0.47051347840626895
|
|
|
|
key: test_accuracy
|
|
value: [0.68181818 0.71212121 0.63636364 0.72307692 0.72307692 0.70769231
|
|
0.66153846 0.67692308 0.75384615 0.76923077]
|
|
|
|
mean value: 0.7045687645687646
|
|
|
|
key: train_accuracy
|
|
value: [0.73594549 0.73253833 0.74957411 0.73469388 0.7414966 0.72619048
|
|
0.73639456 0.74829932 0.67176871 0.74319728]
|
|
|
|
mean value: 0.7320098737961965
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.6984127 0.61290323 0.70967742 0.70967742 0.66666667
|
|
0.59259259 0.66666667 0.72413793 0.76190476]
|
|
|
|
mean value: 0.6730874675911644
|
|
|
|
key: train_fscore
|
|
value: [0.67230444 0.70321361 0.72420263 0.71217712 0.71641791 0.70018622
|
|
0.71028037 0.72284644 0.68820679 0.71775701]
|
|
|
|
mean value: 0.7067592539194687
|
|
|
|
key: test_precision
|
|
value: [0.65217391 0.62857143 0.55882353 0.62857143 0.62857143 0.63333333
|
|
0.59259259 0.58333333 0.7 0.68571429]
|
|
|
|
mean value: 0.6291685273143074
|
|
|
|
key: train_precision
|
|
value: [0.70353982 0.65957447 0.67482517 0.65646259 0.66666667 0.65051903
|
|
0.66202091 0.67482517 0.57258065 0.66666667]
|
|
|
|
mean value: 0.6587681141338156
|
|
|
|
key: test_recall
|
|
value: [0.53571429 0.78571429 0.67857143 0.81481481 0.81481481 0.7037037
|
|
0.59259259 0.77777778 0.75 0.85714286]
|
|
|
|
mean value: 0.7310846560846561
|
|
|
|
key: train_recall
|
|
value: [0.6437247 0.75303644 0.78137652 0.77822581 0.77419355 0.75806452
|
|
0.76612903 0.77822581 0.86234818 0.77732794]
|
|
|
|
mean value: 0.7672652474859606
|
|
|
|
key: test_roc_auc
|
|
value: [0.66259398 0.72180451 0.64191729 0.73635478 0.73635478 0.70711501
|
|
0.65155945 0.69152047 0.75337838 0.77992278]
|
|
|
|
mean value: 0.7082521431205642
|
|
|
|
key: train_roc_auc
|
|
value: [0.72333294 0.73534175 0.75392355 0.74058349 0.7459203 0.73050285
|
|
0.74041746 0.7523482 0.69803626 0.7479015 ]
|
|
|
|
mean value: 0.7368308297115846
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.53658537 0.44186047 0.55 0.55 0.5
|
|
0.42105263 0.5 0.56756757 0.61538462]
|
|
|
|
mean value: 0.5099117312167735
|
|
|
|
key: train_jcc
|
|
value: [0.50636943 0.54227405 0.56764706 0.5530086 0.55813953 0.53868195
|
|
0.55072464 0.5659824 0.52463054 0.55976676]
|
|
|
|
mean value: 0.5467224965443144
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01567888 0.01561689 0.01552606 0.01563478 0.01553893 0.03531313
|
|
0.0243299 0.03120279 0.02710128 0.0156343 ]
|
|
|
|
mean value: 0.02115769386291504
|
|
|
|
key: score_time
|
|
value: [0.01278377 0.01253676 0.0127914 0.01272702 0.01949525 0.02350473
|
|
0.03267789 0.02122116 0.01283574 0.0127399 ]
|
|
|
|
mean value: 0.017331361770629883
|
|
|
|
key: test_mcc
|
|
value: [0.47048344 0.38620478 0.29782716 0.38272699 0.30483531 0.58420716
|
|
0.21241763 0.29397865 0.30984556 0.49323276]
|
|
|
|
mean value: 0.3735759435436844
|
|
|
|
key: train_mcc
|
|
value: [0.4580777 0.47465098 0.52657928 0.48954715 0.46610619 0.433021
|
|
0.46805419 0.49537942 0.46645464 0.45672856]
|
|
|
|
mean value: 0.4734599110307227
|
|
|
|
key: test_accuracy
|
|
value: [0.74242424 0.6969697 0.65151515 0.69230769 0.64615385 0.8
|
|
0.61538462 0.64615385 0.66153846 0.75384615]
|
|
|
|
mean value: 0.6906293706293707
|
|
|
|
key: train_accuracy
|
|
value: [0.73424191 0.74446337 0.76660988 0.75 0.73639456 0.72108844
|
|
0.73979592 0.75170068 0.73639456 0.73469388]
|
|
|
|
mean value: 0.7415383189050748
|
|
|
|
key: test_fscore
|
|
value: [0.69090909 0.65517241 0.61016949 0.65517241 0.62295082 0.74509804
|
|
0.54545455 0.61016949 0.60714286 0.69230769]
|
|
|
|
mean value: 0.6434546855339058
|
|
|
|
key: train_fscore
|
|
value: [0.69047619 0.69387755 0.73189824 0.70775348 0.69902913 0.67843137
|
|
0.69461078 0.71372549 0.69902913 0.68674699]
|
|
|
|
mean value: 0.6995578340936605
|
|
|
|
key: test_precision
|
|
value: [0.7037037 0.63333333 0.58064516 0.61290323 0.55882353 0.79166667
|
|
0.53571429 0.5625 0.60714286 0.75 ]
|
|
|
|
mean value: 0.6336432763069385
|
|
|
|
key: train_precision
|
|
value: [0.6770428 0.69958848 0.70833333 0.69803922 0.6741573 0.66030534
|
|
0.68774704 0.69465649 0.67164179 0.6812749 ]
|
|
|
|
mean value: 0.6852786690390443
|
|
|
|
key: test_recall
|
|
value: [0.67857143 0.67857143 0.64285714 0.7037037 0.7037037 0.7037037
|
|
0.55555556 0.66666667 0.60714286 0.64285714]
|
|
|
|
mean value: 0.6583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.70445344 0.68825911 0.75708502 0.71774194 0.72580645 0.69758065
|
|
0.7016129 0.73387097 0.72874494 0.69230769]
|
|
|
|
mean value: 0.7147463105654956
|
|
|
|
key: test_roc_auc
|
|
value: [0.73402256 0.69454887 0.65037594 0.69395712 0.65448343 0.78606238
|
|
0.60672515 0.64912281 0.65492278 0.74034749]
|
|
|
|
mean value: 0.6864568515884305
|
|
|
|
key: train_roc_auc
|
|
value: [0.7301679 0.73677661 0.76530722 0.74563567 0.73496205 0.71790797
|
|
0.73462998 0.74928843 0.73534021 0.72885179]
|
|
|
|
mean value: 0.7378867830212332
|
|
|
|
key: test_jcc
|
|
value: [0.52777778 0.48717949 0.43902439 0.48717949 0.45238095 0.59375
|
|
0.375 0.43902439 0.43589744 0.52941176]
|
|
|
|
mean value: 0.47666256856088274
|
|
|
|
key: train_jcc
|
|
value: [0.52727273 0.53125 0.57716049 0.54769231 0.53731343 0.51335312
|
|
0.53211009 0.55487805 0.53731343 0.52293578]
|
|
|
|
mean value: 0.5381279430530961
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01461744 0.01566386 0.03456044 0.02335429 0.02419734 0.01425457
|
|
0.01445103 0.01474667 0.01445246 0.0294168 ]
|
|
|
|
mean value: 0.019971489906311035
|
|
|
|
key: score_time
|
|
value: [0.14002895 0.04413342 0.04691148 0.06827736 0.07164907 0.04104376
|
|
0.05115151 0.05609918 0.06565619 0.05760217]
|
|
|
|
mean value: 0.06425530910491943
|
|
|
|
key: test_mcc
|
|
value: [0.50003253 0.24906563 0.27139234 0.23200331 0.35227713 0.32075086
|
|
0.25477363 0.27546396 0.46197936 0.16566028]
|
|
|
|
mean value: 0.3083399044901759
|
|
|
|
key: train_mcc
|
|
value: [0.57465558 0.59579904 0.59637399 0.52614157 0.57178103 0.58332352
|
|
0.59003063 0.58587304 0.5757966 0.61931953]
|
|
|
|
mean value: 0.5819094520279187
|
|
|
|
key: test_accuracy
|
|
value: [0.75757576 0.63636364 0.63636364 0.63076923 0.69230769 0.67692308
|
|
0.64615385 0.64615385 0.73846154 0.6 ]
|
|
|
|
mean value: 0.6661072261072261
|
|
|
|
key: train_accuracy
|
|
value: [0.79386712 0.80408859 0.80408859 0.77040816 0.79251701 0.79761905
|
|
0.80102041 0.79931973 0.79421769 0.81462585]
|
|
|
|
mean value: 0.7971772184171795
|
|
|
|
key: test_fscore
|
|
value: [0.7037037 0.55555556 0.6 0.53846154 0.58333333 0.57142857
|
|
0.53061224 0.58181818 0.65306122 0.48 ]
|
|
|
|
mean value: 0.579797435368864
|
|
|
|
key: train_fscore
|
|
value: [0.74844075 0.76091476 0.7628866 0.71933472 0.74583333 0.75564682
|
|
0.75876289 0.75416667 0.75051546 0.77890467]
|
|
|
|
mean value: 0.7535406659706698
|
|
|
|
key: test_precision
|
|
value: [0.73076923 0.57692308 0.5625 0.56 0.66666667 0.63636364
|
|
0.59090909 0.57142857 0.76190476 0.54545455]
|
|
|
|
mean value: 0.620291958041958
|
|
|
|
key: train_precision
|
|
value: [0.76923077 0.78205128 0.77731092 0.74248927 0.77155172 0.76987448
|
|
0.77637131 0.78017241 0.76470588 0.7804878 ]
|
|
|
|
mean value: 0.7714245856204415
|
|
|
|
key: test_recall
|
|
value: [0.67857143 0.53571429 0.64285714 0.51851852 0.51851852 0.51851852
|
|
0.48148148 0.59259259 0.57142857 0.42857143]
|
|
|
|
mean value: 0.5486772486772487
|
|
|
|
key: train_recall
|
|
value: [0.72874494 0.74089069 0.74898785 0.69758065 0.72177419 0.74193548
|
|
0.74193548 0.72983871 0.73684211 0.77732794]
|
|
|
|
mean value: 0.7365858038396239
|
|
|
|
key: test_roc_auc
|
|
value: [0.74718045 0.6231203 0.63721805 0.61452242 0.667154 0.6539961
|
|
0.62231969 0.63840156 0.71814672 0.57915058]
|
|
|
|
mean value: 0.6501209856473015
|
|
|
|
key: train_roc_auc
|
|
value: [0.7849607 0.79544534 0.79655275 0.76055503 0.78294592 0.79008539
|
|
0.79302657 0.78991935 0.78630962 0.80948508]
|
|
|
|
mean value: 0.7889285755441003
|
|
|
|
key: test_jcc
|
|
value: [0.54285714 0.38461538 0.42857143 0.36842105 0.41176471 0.4
|
|
0.36111111 0.41025641 0.48484848 0.31578947]
|
|
|
|
mean value: 0.41082351944581047
|
|
|
|
key: train_jcc
|
|
value: [0.59800664 0.61409396 0.61666667 0.56168831 0.59468439 0.60726073
|
|
0.61129568 0.60535117 0.60066007 0.63787375]
|
|
|
|
mean value: 0.6047581365850571
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04557109 0.09158087 0.04515958 0.0451467 0.0465467 0.0457809
|
|
0.04573226 0.04564881 0.04582 0.04577756]
|
|
|
|
mean value: 0.050276446342468264
|
|
|
|
key: score_time
|
|
value: [0.02106643 0.04136634 0.02172184 0.02107167 0.02126837 0.02165556
|
|
0.02115321 0.0212276 0.02165985 0.04119205]
|
|
|
|
mean value: 0.025338292121887207
|
|
|
|
key: test_mcc
|
|
value: [0.62551598 0.40813021 0.54353456 0.46460294 0.58628465 0.48616436
|
|
0.39510487 0.4094686 0.59484953 0.63172593]
|
|
|
|
mean value: 0.514538163979908
|
|
|
|
key: train_mcc
|
|
value: [0.60931738 0.60209476 0.6197787 0.61787739 0.59996505 0.61167436
|
|
0.61935879 0.61749631 0.65722353 0.62019793]
|
|
|
|
mean value: 0.6174984218803518
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.71212121 0.77272727 0.73846154 0.8 0.75384615
|
|
0.70769231 0.70769231 0.8 0.81538462]
|
|
|
|
mean value: 0.7626107226107226
|
|
|
|
key: train_accuracy
|
|
value: [0.8109029 0.80749574 0.81601363 0.81462585 0.80612245 0.81122449
|
|
0.81462585 0.81462585 0.83333333 0.81462585]
|
|
|
|
mean value: 0.8143595939227479
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.65454545 0.74576271 0.69090909 0.75471698 0.68
|
|
0.64150943 0.66666667 0.73469388 0.75 ]
|
|
|
|
mean value: 0.7096581994408757
|
|
|
|
key: train_fscore
|
|
value: [0.76631579 0.76109937 0.77118644 0.77432712 0.7625 0.77300613
|
|
0.77890467 0.77244259 0.8 0.7806841 ]
|
|
|
|
mean value: 0.7740466211693395
|
|
|
|
key: test_precision
|
|
value: [0.80769231 0.66666667 0.70967742 0.67857143 0.76923077 0.73913043
|
|
0.65384615 0.63333333 0.85714286 0.9 ]
|
|
|
|
mean value: 0.7415291370620964
|
|
|
|
key: train_precision
|
|
value: [0.79824561 0.79646018 0.80888889 0.79574468 0.7887931 0.78423237
|
|
0.78367347 0.8008658 0.80658436 0.776 ]
|
|
|
|
mean value: 0.7939488461753169
|
|
|
|
key: test_recall
|
|
value: [0.75 0.64285714 0.78571429 0.7037037 0.74074074 0.62962963
|
|
0.62962963 0.7037037 0.64285714 0.64285714]
|
|
|
|
mean value: 0.6871693121693122
|
|
|
|
key: train_recall
|
|
value: [0.73684211 0.72874494 0.73684211 0.75403226 0.73790323 0.76209677
|
|
0.77419355 0.74596774 0.79352227 0.7854251 ]
|
|
|
|
mean value: 0.755557006660572
|
|
|
|
key: test_roc_auc
|
|
value: [0.80921053 0.70300752 0.77443609 0.7334308 0.791423 0.73586745
|
|
0.69639376 0.70711501 0.78088803 0.79440154]
|
|
|
|
mean value: 0.7526173730121098
|
|
|
|
key: train_roc_auc
|
|
value: [0.80077399 0.79672541 0.80518576 0.80642789 0.79689279 0.8045778
|
|
0.8091556 0.80533681 0.82784618 0.81060111]
|
|
|
|
mean value: 0.8063523346223337
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.48648649 0.59459459 0.52777778 0.60606061 0.51515152
|
|
0.47222222 0.5 0.58064516 0.6 ]
|
|
|
|
mean value: 0.5519301999947162
|
|
|
|
key: train_jcc
|
|
value: [0.62116041 0.61433447 0.62758621 0.63175676 0.61616162 0.63
|
|
0.63787375 0.6292517 0.66666667 0.64026403]
|
|
|
|
mean value: 0.6315055608263402
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [4.95288038 3.51090693 4.1410749 4.41169286 3.81011581 3.0505271
|
|
2.88554645 2.73471856 2.5703938 2.43770528]
|
|
|
|
mean value: 3.450556206703186
|
|
|
|
key: score_time
|
|
value: [0.02655077 0.02042603 0.02268815 0.0593164 0.02366924 0.01313233
|
|
0.01821995 0.01510763 0.02347803 0.01534796]
|
|
|
|
mean value: 0.023793649673461915
|
|
|
|
key: test_mcc
|
|
value: [0.6324803 0.53048978 0.58952142 0.61703008 0.71358864 0.45883147
|
|
0.37392718 0.46727593 0.52562245 0.65334379]
|
|
|
|
mean value: 0.556211102803363
|
|
|
|
key: train_mcc
|
|
value: [0.95824735 0.91253296 0.94830132 0.96189583 0.93787358 0.93377872
|
|
0.96176122 0.9724087 0.93758373 0.93476474]
|
|
|
|
mean value: 0.9459148150528274
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.77272727 0.77272727 0.81538462 0.86153846 0.73846154
|
|
0.69230769 0.72307692 0.76923077 0.83076923]
|
|
|
|
mean value: 0.7794405594405595
|
|
|
|
key: train_accuracy
|
|
value: [0.97955707 0.95741056 0.97444634 0.98129252 0.96938776 0.96768707
|
|
0.98129252 0.98639456 0.96938776 0.96768707]
|
|
|
|
mean value: 0.9734543221036285
|
|
|
|
key: test_fscore
|
|
value: [0.79310345 0.71698113 0.7761194 0.76923077 0.82352941 0.67924528
|
|
0.64285714 0.70967742 0.71698113 0.79245283]
|
|
|
|
mean value: 0.7420177971826885
|
|
|
|
key: train_fscore
|
|
value: [0.97590361 0.9490835 0.97017893 0.97804391 0.96428571 0.96114519
|
|
0.97795591 0.98406375 0.964 0.96237624]
|
|
|
|
mean value: 0.9687036759156895
|
|
|
|
key: test_precision
|
|
value: [0.76666667 0.76 0.66666667 0.8 0.875 0.69230769
|
|
0.62068966 0.62857143 0.76 0.84 ]
|
|
|
|
mean value: 0.7409902109384868
|
|
|
|
key: train_precision
|
|
value: [0.96812749 0.95491803 0.953125 0.96837945 0.94921875 0.97510373
|
|
0.97211155 0.97244094 0.95256917 0.94186047]
|
|
|
|
mean value: 0.960785458765038
|
|
|
|
key: test_recall
|
|
value: [0.82142857 0.67857143 0.92857143 0.74074074 0.77777778 0.66666667
|
|
0.66666667 0.81481481 0.67857143 0.75 ]
|
|
|
|
mean value: 0.7523809523809524
|
|
|
|
key: train_recall
|
|
value: [0.98380567 0.94331984 0.98785425 0.98790323 0.97983871 0.94758065
|
|
0.98387097 0.99596774 0.9757085 0.98380567]
|
|
|
|
mean value: 0.9769655217448087
|
|
|
|
key: test_roc_auc
|
|
value: [0.81860902 0.76033835 0.79323308 0.8045809 0.8494152 0.72807018
|
|
0.68859649 0.73635478 0.75820463 0.82094595]
|
|
|
|
mean value: 0.7758348574138048
|
|
|
|
key: train_roc_auc
|
|
value: [0.98013813 0.95548345 0.97628007 0.98218691 0.97080171 0.96496679
|
|
0.98164137 0.98768975 0.97025894 0.9699087 ]
|
|
|
|
mean value: 0.9739355812967617
|
|
|
|
key: test_jcc
|
|
value: [0.65714286 0.55882353 0.63414634 0.625 0.7 0.51428571
|
|
0.47368421 0.55 0.55882353 0.65625 ]
|
|
|
|
mean value: 0.5928156182241832
|
|
|
|
key: train_jcc
|
|
value: [0.95294118 0.90310078 0.94208494 0.95703125 0.93103448 0.92519685
|
|
0.95686275 0.96862745 0.93050193 0.92748092]
|
|
|
|
mean value: 0.9394862519512547
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05443668 0.03855729 0.03228474 0.03359437 0.03423095 0.03301144
|
|
0.03654695 0.03661156 0.03370905 0.0323894 ]
|
|
|
|
mean value: 0.03653724193572998
|
|
|
|
key: score_time
|
|
value: [0.00968671 0.00915575 0.00899315 0.00887203 0.00888515 0.00890875
|
|
0.00891566 0.00896859 0.00897646 0.00886536]
|
|
|
|
mean value: 0.009022760391235351
|
|
|
|
key: test_mcc
|
|
value: [0.63944497 0.7518797 0.62551598 0.81355733 0.81355733 0.65001125
|
|
0.55485797 0.78521625 0.78106513 0.81177606]
|
|
|
|
mean value: 0.7226881968918047
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.87878788 0.81818182 0.90769231 0.90769231 0.83076923
|
|
0.76923077 0.89230769 0.89230769 0.90769231]
|
|
|
|
mean value: 0.8622843822843823
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.85714286 0.77777778 0.89285714 0.89285714 0.79245283
|
|
0.75409836 0.87719298 0.86792453 0.89285714]
|
|
|
|
mean value: 0.8405160765094508
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.85714286 0.80769231 0.86206897 0.86206897 0.80769231
|
|
0.67647059 0.83333333 0.92 0.89285714]
|
|
|
|
mean value: 0.8269326467987725
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.85714286 0.75 0.92592593 0.92592593 0.77777778
|
|
0.85185185 0.92592593 0.82142857 0.89285714]
|
|
|
|
mean value: 0.8585978835978836
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.82330827 0.87593985 0.80921053 0.91033138 0.91033138 0.82309942
|
|
0.78118908 0.89717349 0.88368726 0.90588803]
|
|
|
|
mean value: 0.8620158692527113
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.75 0.63636364 0.80645161 0.80645161 0.65625
|
|
0.60526316 0.78125 0.76666667 0.80645161]
|
|
|
|
mean value: 0.7281814966301384
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14534354 0.14375496 0.14363265 0.14585638 0.1456449 0.14597726
|
|
0.14634418 0.14502072 0.14271498 0.14374256]
|
|
|
|
mean value: 0.14480321407318114
|
|
|
|
key: score_time
|
|
value: [0.01830816 0.01829052 0.01826406 0.01852727 0.01860476 0.01825523
|
|
0.01866341 0.01826715 0.01823401 0.0181725 ]
|
|
|
|
mean value: 0.018358707427978516
|
|
|
|
key: test_mcc
|
|
value: [0.49825151 0.50003253 0.55182541 0.52764927 0.65001125 0.45883147
|
|
0.42087369 0.45537869 0.52841444 0.32910919]
|
|
|
|
mean value: 0.49203774418938706
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75757576 0.75757576 0.77272727 0.76923077 0.83076923 0.73846154
|
|
0.72307692 0.72307692 0.76923077 0.67692308]
|
|
|
|
mean value: 0.7518648018648019
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.69230769 0.7037037 0.75409836 0.72727273 0.79245283 0.67924528
|
|
0.64 0.7 0.69387755 0.57142857]
|
|
|
|
mean value: 0.6954386719596388
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.73076923 0.6969697 0.71428571 0.80769231 0.69230769
|
|
0.69565217 0.63636364 0.80952381 0.66666667]
|
|
|
|
mean value: 0.7200230928491798
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.64285714 0.67857143 0.82142857 0.74074074 0.77777778 0.66666667
|
|
0.59259259 0.77777778 0.60714286 0.5 ]
|
|
|
|
mean value: 0.6805555555555556
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7424812 0.74718045 0.77913534 0.76510721 0.82309942 0.72807018
|
|
0.70419103 0.73099415 0.74951737 0.65540541]
|
|
|
|
mean value: 0.7425181760708076
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.52941176 0.54285714 0.60526316 0.57142857 0.65625 0.51428571
|
|
0.47058824 0.53846154 0.53125 0.4 ]
|
|
|
|
mean value: 0.5359796124927704
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01078057 0.01063585 0.01078033 0.01067924 0.01064467 0.01073027
|
|
0.01065564 0.01075292 0.01105666 0.01065898]
|
|
|
|
mean value: 0.01073751449584961
|
|
|
|
key: score_time
|
|
value: [0.00893903 0.00913692 0.00885129 0.00883389 0.00882244 0.00884271
|
|
0.00880599 0.00892782 0.00925374 0.00890636]
|
|
|
|
mean value: 0.008932018280029297
|
|
|
|
key: test_mcc
|
|
value: [0.35925401 0.30706198 0.38620478 0.43673149 0.43673149 0.51951641
|
|
0.1062616 0.4094686 0.43532819 0.16566028]
|
|
|
|
mean value: 0.3562218818704287
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.68181818 0.66666667 0.6969697 0.72307692 0.72307692 0.76923077
|
|
0.58461538 0.70769231 0.72307692 0.6 ]
|
|
|
|
mean value: 0.6876223776223777
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.6440678 0.57692308 0.65517241 0.67857143 0.67857143 0.70588235
|
|
0.4 0.66666667 0.67857143 0.48 ]
|
|
|
|
mean value: 0.6164426592648479
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.61290323 0.625 0.63333333 0.65517241 0.65517241 0.75
|
|
0.5 0.63333333 0.67857143 0.54545455]
|
|
|
|
mean value: 0.6288940694085299
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.67857143 0.53571429 0.67857143 0.7037037 0.7037037 0.66666667
|
|
0.33333333 0.7037037 0.67857143 0.42857143]
|
|
|
|
mean value: 0.6111111111111112
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.68139098 0.64943609 0.69454887 0.7202729 0.7202729 0.75438596
|
|
0.54824561 0.70711501 0.71766409 0.57915058]
|
|
|
|
mean value: 0.6772483009325114
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.475 0.40540541 0.48717949 0.51351351 0.51351351 0.54545455
|
|
0.25 0.5 0.51351351 0.31578947]
|
|
|
|
mean value: 0.4519369452264189
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.13737011 2.13456511 2.13529062 2.63254333 2.35393476 2.41857886
|
|
2.32477784 2.3284719 2.36776686 2.40691328]
|
|
|
|
mean value: 2.32402126789093
|
|
|
|
key: score_time
|
|
value: [0.09437299 0.0945363 0.09483504 0.13788605 0.10371614 0.10218167
|
|
0.09945846 0.09545922 0.10537124 0.10566664]
|
|
|
|
mean value: 0.10334837436676025
|
|
|
|
key: test_mcc
|
|
value: [0.7518797 0.62781955 0.75561806 0.71373784 0.74537234 0.75776097
|
|
0.61988304 0.68794872 0.75074384 0.78106513]
|
|
|
|
mean value: 0.7191829180551123
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.87878788 0.81818182 0.87878788 0.86153846 0.87692308 0.87692308
|
|
0.81538462 0.84615385 0.87692308 0.89230769]
|
|
|
|
mean value: 0.8621911421911422
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.78571429 0.86206897 0.83018868 0.84615385 0.86206897
|
|
0.77777778 0.82142857 0.84615385 0.86792453]
|
|
|
|
mean value: 0.8356622322952837
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.78571429 0.83333333 0.84615385 0.88 0.80645161
|
|
0.77777778 0.79310345 0.91666667 0.92 ]
|
|
|
|
mean value: 0.8416343827967855
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.78571429 0.89285714 0.81481481 0.81481481 0.92592593
|
|
0.77777778 0.85185185 0.78571429 0.82142857]
|
|
|
|
mean value: 0.8328042328042328
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.87593985 0.81390977 0.8806391 0.85477583 0.86793372 0.88401559
|
|
0.80994152 0.84697856 0.86583012 0.88368726]
|
|
|
|
mean value: 0.8583651320493425
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.64705882 0.75757576 0.70967742 0.73333333 0.75757576
|
|
0.63636364 0.6969697 0.73333333 0.76666667]
|
|
|
|
mean value: 0.7188554424702432
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [2.21234727 1.30542231 1.38805318 1.30887151 1.37292385 1.53155303
|
|
1.21596074 1.03473687 1.36110282 1.65195084]
|
|
|
|
mean value: 1.4382922410964967
|
|
|
|
key: score_time
|
|
value: [0.20063782 0.21798038 0.15705633 0.16267562 0.17024279 0.13860297
|
|
0.22972798 0.19579649 0.26243567 0.21305084]
|
|
|
|
mean value: 0.19482069015502929
|
|
|
|
key: test_mcc
|
|
value: [0.7518797 0.62781955 0.78428398 0.71373784 0.84119102 0.65374193
|
|
0.58628465 0.75075302 0.75074384 0.75074384]
|
|
|
|
mean value: 0.7211179376451361
|
|
|
|
key: train_mcc
|
|
value: [0.92695091 0.90963269 0.90584436 0.90625171 0.91302428 0.91631879
|
|
0.91658976 0.93026565 0.92346816 0.91334592]
|
|
|
|
mean value: 0.916169223087544
|
|
|
|
key: test_accuracy
|
|
value: [0.87878788 0.81818182 0.89393939 0.86153846 0.92307692 0.83076923
|
|
0.8 0.87692308 0.87692308 0.87692308]
|
|
|
|
mean value: 0.8637062937062937
|
|
|
|
key: train_accuracy
|
|
value: [0.96422487 0.95570698 0.95400341 0.95408163 0.95748299 0.95918367
|
|
0.95918367 0.96598639 0.96258503 0.95748299]
|
|
|
|
mean value: 0.9589921658612337
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.78571429 0.87719298 0.83018868 0.90566038 0.8
|
|
0.75471698 0.85714286 0.84615385 0.84615385]
|
|
|
|
mean value: 0.8360066712499682
|
|
|
|
key: train_fscore
|
|
value: [0.95791583 0.948 0.94567404 0.94610778 0.9498998 0.9516129
|
|
0.952 0.95967742 0.95582329 0.9500998 ]
|
|
|
|
mean value: 0.9516810876111794
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.78571429 0.86206897 0.84615385 0.92307692 0.78571429
|
|
0.76923077 0.82758621 0.91666667 0.91666667]
|
|
|
|
mean value: 0.8490021472780094
|
|
|
|
key: train_precision
|
|
value: [0.9484127 0.93675889 0.94 0.93675889 0.94422311 0.9516129
|
|
0.94444444 0.95967742 0.94820717 0.93700787]
|
|
|
|
mean value: 0.9447103404899263
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.78571429 0.89285714 0.81481481 0.88888889 0.81481481
|
|
0.74074074 0.88888889 0.78571429 0.78571429]
|
|
|
|
mean value: 0.8255291005291006
|
|
|
|
key: train_recall
|
|
value: [0.96761134 0.95951417 0.951417 0.95564516 0.95564516 0.9516129
|
|
0.95967742 0.95967742 0.96356275 0.96356275]
|
|
|
|
mean value: 0.9587926080710462
|
|
|
|
key: test_roc_auc
|
|
value: [0.87593985 0.81390977 0.89379699 0.85477583 0.91812865 0.82846004
|
|
0.791423 0.87865497 0.86583012 0.86583012]
|
|
|
|
mean value: 0.858674934332829
|
|
|
|
key: train_roc_auc
|
|
value: [0.96468802 0.95622767 0.95364968 0.95429317 0.95723435 0.95815939
|
|
0.95925047 0.96513283 0.96271979 0.95832097]
|
|
|
|
mean value: 0.958967634033798
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.64705882 0.78125 0.70967742 0.82758621 0.66666667
|
|
0.60606061 0.75 0.73333333 0.73333333]
|
|
|
|
mean value: 0.7204966389174742
|
|
|
|
key: train_jcc
|
|
value: [0.91923077 0.90114068 0.89694656 0.89772727 0.90458015 0.90769231
|
|
0.90839695 0.92248062 0.91538462 0.90494297]
|
|
|
|
mean value: 0.9078522899502255
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02466083 0.01058507 0.01057553 0.01058054 0.01059246 0.01055098
|
|
0.01051736 0.01058578 0.0105772 0.01056647]
|
|
|
|
mean value: 0.011979222297668457
|
|
|
|
key: score_time
|
|
value: [0.00913358 0.00899863 0.00914288 0.00892329 0.00889254 0.00897717
|
|
0.00898194 0.0089457 0.00892258 0.00897408]
|
|
|
|
mean value: 0.008989238739013672
|
|
|
|
key: test_mcc
|
|
value: [0.47048344 0.38620478 0.29782716 0.38272699 0.30483531 0.58420716
|
|
0.21241763 0.29397865 0.30984556 0.49323276]
|
|
|
|
mean value: 0.3735759435436844
|
|
|
|
key: train_mcc
|
|
value: [0.4580777 0.47465098 0.52657928 0.48954715 0.46610619 0.433021
|
|
0.46805419 0.49537942 0.46645464 0.45672856]
|
|
|
|
mean value: 0.4734599110307227
|
|
|
|
key: test_accuracy
|
|
value: [0.74242424 0.6969697 0.65151515 0.69230769 0.64615385 0.8
|
|
0.61538462 0.64615385 0.66153846 0.75384615]
|
|
|
|
mean value: 0.6906293706293707
|
|
|
|
key: train_accuracy
|
|
value: [0.73424191 0.74446337 0.76660988 0.75 0.73639456 0.72108844
|
|
0.73979592 0.75170068 0.73639456 0.73469388]
|
|
|
|
mean value: 0.7415383189050748
|
|
|
|
key: test_fscore
|
|
value: [0.69090909 0.65517241 0.61016949 0.65517241 0.62295082 0.74509804
|
|
0.54545455 0.61016949 0.60714286 0.69230769]
|
|
|
|
mean value: 0.6434546855339058
|
|
|
|
key: train_fscore
|
|
value: [0.69047619 0.69387755 0.73189824 0.70775348 0.69902913 0.67843137
|
|
0.69461078 0.71372549 0.69902913 0.68674699]
|
|
|
|
mean value: 0.6995578340936605
|
|
|
|
key: test_precision
|
|
value: [0.7037037 0.63333333 0.58064516 0.61290323 0.55882353 0.79166667
|
|
0.53571429 0.5625 0.60714286 0.75 ]
|
|
|
|
mean value: 0.6336432763069385
|
|
|
|
key: train_precision
|
|
value: [0.6770428 0.69958848 0.70833333 0.69803922 0.6741573 0.66030534
|
|
0.68774704 0.69465649 0.67164179 0.6812749 ]
|
|
|
|
mean value: 0.6852786690390443
|
|
|
|
key: test_recall
|
|
value: [0.67857143 0.67857143 0.64285714 0.7037037 0.7037037 0.7037037
|
|
0.55555556 0.66666667 0.60714286 0.64285714]
|
|
|
|
mean value: 0.6583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.70445344 0.68825911 0.75708502 0.71774194 0.72580645 0.69758065
|
|
0.7016129 0.73387097 0.72874494 0.69230769]
|
|
|
|
mean value: 0.7147463105654956
|
|
|
|
key: test_roc_auc
|
|
value: [0.73402256 0.69454887 0.65037594 0.69395712 0.65448343 0.78606238
|
|
0.60672515 0.64912281 0.65492278 0.74034749]
|
|
|
|
mean value: 0.6864568515884305
|
|
|
|
key: train_roc_auc
|
|
value: [0.7301679 0.73677661 0.76530722 0.74563567 0.73496205 0.71790797
|
|
0.73462998 0.74928843 0.73534021 0.72885179]
|
|
|
|
mean value: 0.7378867830212332
|
|
|
|
key: test_jcc
|
|
value: [0.52777778 0.48717949 0.43902439 0.48717949 0.45238095 0.59375
|
|
0.375 0.43902439 0.43589744 0.52941176]
|
|
|
|
mean value: 0.47666256856088274
|
|
|
|
key: train_jcc
|
|
value: [0.52727273 0.53125 0.57716049 0.54769231 0.53731343 0.51335312
|
|
0.53211009 0.55487805 0.53731343 0.52293578]
|
|
|
|
mean value: 0.5381279430530961
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [2.54061818 8.41783857 7.24396205 7.64712381 7.72478628 7.23917007
|
|
4.18405128 5.06533813 8.42402911 7.15204573]
|
|
|
|
mean value: 6.563896322250367
|
|
|
|
key: score_time
|
|
value: [0.01274681 0.02059245 0.0194366 0.02421999 0.02902532 0.0225451
|
|
0.01344848 0.0194695 0.01913404 0.03150439]
|
|
|
|
mean value: 0.021212267875671386
|
|
|
|
key: test_mcc
|
|
value: [0.7518797 0.71989635 0.7518797 0.84288091 0.93664717 0.81355733
|
|
0.75776097 0.75075302 0.81512161 0.90592724]
|
|
|
|
mean value: 0.8046304004759343
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.87878788 0.86363636 0.87878788 0.92307692 0.96923077 0.90769231
|
|
0.87692308 0.87692308 0.90769231 0.95384615]
|
|
|
|
mean value: 0.9036596736596737
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.83636364 0.85714286 0.90909091 0.96296296 0.89285714
|
|
0.86206897 0.85714286 0.88461538 0.94545455]
|
|
|
|
mean value: 0.8864842118290395
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.85185185 0.85714286 0.89285714 0.96296296 0.86206897
|
|
0.80645161 0.82758621 0.95833333 0.96296296]
|
|
|
|
mean value: 0.8839360753570987
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.82142857 0.85714286 0.92592593 0.96296296 0.92592593
|
|
0.92592593 0.88888889 0.82142857 0.92857143]
|
|
|
|
mean value: 0.8915343915343915
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.87593985 0.85808271 0.87593985 0.92348928 0.96832359 0.91033138
|
|
0.88401559 0.87865497 0.89720077 0.9507722 ]
|
|
|
|
mean value: 0.9022750193802825
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.71875 0.75 0.83333333 0.92857143 0.80645161
|
|
0.75757576 0.75 0.79310345 0.89655172]
|
|
|
|
mean value: 0.7984337304797539
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.14518332 0.10405898 0.13379717 0.17119074 0.15411258 0.12464881
|
|
0.12550688 0.11781502 0.13254738 0.1455822 ]
|
|
|
|
mean value: 0.1354443073272705
|
|
|
|
key: score_time
|
|
value: [0.04804635 0.02783465 0.05546546 0.01987624 0.03345323 0.0386436
|
|
0.03872681 0.04116654 0.03594565 0.04259109]
|
|
|
|
mean value: 0.03817496299743652
|
|
|
|
key: test_mcc
|
|
value: [0.45541866 0.66074521 0.62357561 0.5623401 0.71373784 0.49953579
|
|
0.48131798 0.60621087 0.40673378 0.7277197 ]
|
|
|
|
mean value: 0.5737335541739753
|
|
|
|
key: train_mcc
|
|
value: [0.79117437 0.80146354 0.78219233 0.78179133 0.76790583 0.78835244
|
|
0.7757455 0.77484858 0.7778972 0.76911078]
|
|
|
|
mean value: 0.7810481887271579
|
|
|
|
key: test_accuracy
|
|
value: [0.72727273 0.83333333 0.8030303 0.78461538 0.86153846 0.75384615
|
|
0.73846154 0.8 0.70769231 0.86153846]
|
|
|
|
mean value: 0.7871328671328671
|
|
|
|
key: train_accuracy
|
|
value: [0.89778535 0.90289608 0.89267462 0.89285714 0.88605442 0.8962585
|
|
0.88945578 0.88945578 0.89115646 0.88605442]
|
|
|
|
mean value: 0.8924648564706974
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.80701754 0.79365079 0.75 0.83018868 0.71428571
|
|
0.71186441 0.77966102 0.66666667 0.81632653]
|
|
|
|
mean value: 0.7569661352049165
|
|
|
|
key: train_fscore
|
|
value: [0.88 0.88577154 0.87573964 0.87524752 0.86732673 0.87872763
|
|
0.87229862 0.87128713 0.87250996 0.86836935]
|
|
|
|
mean value: 0.8747278144973756
|
|
|
|
key: test_precision
|
|
value: [0.65625 0.79310345 0.71428571 0.72413793 0.84615385 0.68965517
|
|
0.65625 0.71875 0.65517241 0.95238095]
|
|
|
|
mean value: 0.7406139478337754
|
|
|
|
key: train_precision
|
|
value: [0.86956522 0.87698413 0.85384615 0.85992218 0.85214008 0.86666667
|
|
0.85057471 0.85603113 0.85882353 0.84351145]
|
|
|
|
mean value: 0.8588065242539382
|
|
|
|
key: test_recall
|
|
value: [0.75 0.82142857 0.89285714 0.77777778 0.81481481 0.74074074
|
|
0.77777778 0.85185185 0.67857143 0.71428571]
|
|
|
|
mean value: 0.782010582010582
|
|
|
|
key: train_recall
|
|
value: [0.89068826 0.89473684 0.89878543 0.89112903 0.88306452 0.89112903
|
|
0.89516129 0.88709677 0.88663968 0.89473684]
|
|
|
|
mean value: 0.8913167689695703
|
|
|
|
key: test_roc_auc
|
|
value: [0.73026316 0.83176692 0.81484962 0.78362573 0.85477583 0.75194932
|
|
0.74415205 0.80750487 0.70415058 0.84362934]
|
|
|
|
mean value: 0.7866667419298998
|
|
|
|
key: train_roc_auc
|
|
value: [0.89681472 0.90178019 0.89351036 0.89262334 0.88564991 0.89556452
|
|
0.8902277 0.88913662 0.89053391 0.88725112]
|
|
|
|
mean value: 0.8923092383747282
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.67647059 0.65789474 0.6 0.70967742 0.55555556
|
|
0.55263158 0.63888889 0.5 0.68965517]
|
|
|
|
mean value: 0.6119235478699383
|
|
|
|
key: train_jcc
|
|
value: [0.78571429 0.79496403 0.77894737 0.77816901 0.76573427 0.78368794
|
|
0.77351916 0.77192982 0.77385159 0.76736111]
|
|
|
|
mean value: 0.7773878595535089
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01819944 0.01511383 0.0151732 0.01494551 0.01508546 0.01504469
|
|
0.01502967 0.0351162 0.03492212 0.02697062]
|
|
|
|
mean value: 0.020560073852539062
|
|
|
|
key: score_time
|
|
value: [0.01248837 0.0126307 0.01266456 0.01248789 0.01264739 0.01254988
|
|
0.01258659 0.0241158 0.02403378 0.01960278]
|
|
|
|
mean value: 0.01558077335357666
|
|
|
|
key: test_mcc
|
|
value: [0.45541866 0.38620478 0.46471292 0.55485797 0.44523265 0.46460294
|
|
0.21241763 0.38272699 0.6219883 0.58948312]
|
|
|
|
mean value: 0.4577645953980349
|
|
|
|
key: train_mcc
|
|
value: [0.46958286 0.49425543 0.53041672 0.50161181 0.50453361 0.51452364
|
|
0.47842327 0.50302721 0.49256013 0.51795013]
|
|
|
|
mean value: 0.5006884805465163
|
|
|
|
key: test_accuracy
|
|
value: [0.72727273 0.6969697 0.72727273 0.76923077 0.72307692 0.73846154
|
|
0.61538462 0.69230769 0.81538462 0.8 ]
|
|
|
|
mean value: 0.7305361305361305
|
|
|
|
key: train_accuracy
|
|
value: [0.73935264 0.75127768 0.76831346 0.75510204 0.75510204 0.76020408
|
|
0.74319728 0.75510204 0.75 0.76190476]
|
|
|
|
mean value: 0.7539556026840037
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.65517241 0.70967742 0.75409836 0.68965517 0.69090909
|
|
0.54545455 0.65517241 0.76923077 0.75471698]
|
|
|
|
mean value: 0.6924087166737057
|
|
|
|
key: train_fscore
|
|
value: [0.69822485 0.71259843 0.734375 0.71653543 0.72093023 0.72621359
|
|
0.70450098 0.71875 0.71232877 0.72762646]
|
|
|
|
mean value: 0.7172083739870709
|
|
|
|
key: test_precision
|
|
value: [0.65625 0.63333333 0.64705882 0.67647059 0.64516129 0.67857143
|
|
0.53571429 0.61290323 0.83333333 0.8 ]
|
|
|
|
mean value: 0.6718796308846119
|
|
|
|
key: train_precision
|
|
value: [0.68076923 0.69348659 0.70943396 0.7 0.69402985 0.70037453
|
|
0.68441065 0.6969697 0.68939394 0.70037453]
|
|
|
|
mean value: 0.6949242980239846
|
|
|
|
key: test_recall
|
|
value: [0.75 0.67857143 0.78571429 0.85185185 0.74074074 0.7037037
|
|
0.55555556 0.7037037 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7198412698412698
|
|
|
|
key: train_recall
|
|
value: [0.71659919 0.73279352 0.7611336 0.73387097 0.75 0.75403226
|
|
0.72580645 0.74193548 0.73684211 0.75708502]
|
|
|
|
mean value: 0.741009860258587
|
|
|
|
key: test_roc_auc
|
|
value: [0.73026316 0.69454887 0.73496241 0.78118908 0.72563353 0.7334308
|
|
0.60672515 0.69395712 0.8030888 0.78957529]
|
|
|
|
mean value: 0.7293374201268938
|
|
|
|
key: train_roc_auc
|
|
value: [0.73624077 0.7487497 0.76733151 0.7522296 0.75441176 0.75936907
|
|
0.7408444 0.75332068 0.74818645 0.76124046]
|
|
|
|
mean value: 0.7521924409107321
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.48717949 0.55 0.60526316 0.52631579 0.52777778
|
|
0.375 0.48717949 0.625 0.60606061]
|
|
|
|
mean value: 0.5328237844027318
|
|
|
|
key: train_jcc
|
|
value: [0.53636364 0.55351682 0.58024691 0.55828221 0.56363636 0.57012195
|
|
0.54380665 0.56097561 0.55319149 0.57186544]
|
|
|
|
mean value: 0.5592007082029138
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02960873 0.0522666 0.05202031 0.03943491 0.04185295 0.05404019
|
|
0.04617739 0.05087376 0.06957674 0.06584883]
|
|
|
|
mean value: 0.05017004013061523
|
|
|
|
key: score_time
|
|
value: [0.02536869 0.02496433 0.0193212 0.02704597 0.0292387 0.02346063
|
|
0.02069235 0.02265573 0.02009249 0.04243779]
|
|
|
|
mean value: 0.02552778720855713
|
|
|
|
key: test_mcc
|
|
value: [0.63641182 0.49051147 0.68984962 0.44164062 0.62514441 0.5846598
|
|
0.38801412 0.41804565 0.53454691 0.44305019]
|
|
|
|
mean value: 0.5251874629997096
|
|
|
|
key: train_mcc
|
|
value: [0.70443937 0.69796362 0.69088835 0.50316884 0.64044328 0.69041337
|
|
0.5932234 0.72942757 0.55175432 0.65853976]
|
|
|
|
mean value: 0.6460261881355452
|
|
|
|
key: test_accuracy
|
|
value: [0.8030303 0.74242424 0.84848485 0.63076923 0.81538462 0.8
|
|
0.70769231 0.69230769 0.72307692 0.70769231]
|
|
|
|
mean value: 0.747086247086247
|
|
|
|
key: train_accuracy
|
|
value: [0.83645656 0.84497445 0.85008518 0.68197279 0.82142857 0.84863946
|
|
0.79591837 0.86394558 0.71598639 0.79931973]
|
|
|
|
mean value: 0.8058727068340112
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.72131148 0.82142857 0.69230769 0.78571429 0.73469388
|
|
0.55813953 0.6875 0.75 0.70769231]
|
|
|
|
mean value: 0.7258787744987434
|
|
|
|
key: train_fscore
|
|
value: [0.83275261 0.83116883 0.81666667 0.72540382 0.79768786 0.80353201
|
|
0.7044335 0.84790875 0.7473525 0.80528053]
|
|
|
|
mean value: 0.7912187066147196
|
|
|
|
key: test_precision
|
|
value: [0.7027027 0.66666667 0.82142857 0.52941176 0.75862069 0.81818182
|
|
0.75 0.59459459 0.61363636 0.62162162]
|
|
|
|
mean value: 0.6876864793193393
|
|
|
|
key: train_precision
|
|
value: [0.73088685 0.76712329 0.84120172 0.5704388 0.76383764 0.88780488
|
|
0.90506329 0.80215827 0.59661836 0.67966574]
|
|
|
|
mean value: 0.754479883023373
|
|
|
|
key: test_recall
|
|
value: [0.92857143 0.78571429 0.82142857 1. 0.81481481 0.66666667
|
|
0.44444444 0.81481481 0.96428571 0.82142857]
|
|
|
|
mean value: 0.8062169312169312
|
|
|
|
key: train_recall
|
|
value: [0.96761134 0.90688259 0.79352227 0.99596774 0.83467742 0.73387097
|
|
0.5766129 0.89919355 1. 0.98785425]
|
|
|
|
mean value: 0.8696193025989291
|
|
|
|
key: test_roc_auc
|
|
value: [0.81954887 0.7481203 0.84492481 0.68421053 0.81530214 0.78070175
|
|
0.66959064 0.71003899 0.75241313 0.7215251 ]
|
|
|
|
mean value: 0.7546376263481527
|
|
|
|
key: train_roc_auc
|
|
value: [0.8543939 0.8534413 0.84234937 0.72445446 0.82322106 0.83311195
|
|
0.76624763 0.86871442 0.75513196 0.82530542]
|
|
|
|
mean value: 0.8146371482806617
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.56410256 0.6969697 0.52941176 0.64705882 0.58064516
|
|
0.38709677 0.52380952 0.6 0.54761905]
|
|
|
|
mean value: 0.5743380022886664
|
|
|
|
key: train_jcc
|
|
value: [0.71343284 0.71111111 0.69014085 0.56912442 0.66346154 0.67158672
|
|
0.54372624 0.7359736 0.59661836 0.67403315]
|
|
|
|
mean value: 0.6569208810054634
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03120399 0.06726694 0.06336164 0.05971932 0.05662465 0.06744671
|
|
0.02830267 0.02712274 0.04990125 0.05505729]
|
|
|
|
mean value: 0.050600719451904294
|
|
|
|
key: score_time
|
|
value: [0.02665305 0.04292178 0.01279736 0.03229451 0.02208591 0.01279974
|
|
0.01282239 0.01391006 0.02769732 0.01272583]
|
|
|
|
mean value: 0.02167079448699951
|
|
|
|
key: test_mcc
|
|
value: [0.65688806 0.61278195 0.63944497 0.59252515 0.56195149 0.55652057
|
|
0.47209321 0.4816727 0.49614829 0.55761053]
|
|
|
|
mean value: 0.5627636919806256
|
|
|
|
key: train_mcc
|
|
value: [0.6968363 0.65832425 0.74427977 0.75701865 0.56152034 0.65879988
|
|
0.76290323 0.55409335 0.66098966 0.71904589]
|
|
|
|
mean value: 0.6773811332062949
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.78787879 0.81818182 0.78461538 0.72307692 0.78461538
|
|
0.73846154 0.66153846 0.72307692 0.78461538]
|
|
|
|
mean value: 0.7639393939393939
|
|
|
|
key: train_accuracy
|
|
value: [0.85178876 0.80068143 0.87563884 0.87755102 0.72789116 0.82823129
|
|
0.88435374 0.7244898 0.80102041 0.84863946]
|
|
|
|
mean value: 0.8220285899709117
|
|
|
|
key: test_fscore
|
|
value: [0.79245283 0.78787879 0.8 0.77419355 0.75 0.69565217
|
|
0.70175439 0.71052632 0.73529412 0.73076923]
|
|
|
|
mean value: 0.7478521390538283
|
|
|
|
key: train_fscore
|
|
value: [0.80709534 0.80597015 0.85071575 0.86311787 0.75460123 0.75894988
|
|
0.86290323 0.75153374 0.80661157 0.84135472]
|
|
|
|
mean value: 0.8102853479833604
|
|
|
|
key: test_precision
|
|
value: [0.84 0.68421053 0.75 0.68571429 0.6 0.84210526
|
|
0.66666667 0.55102041 0.625 0.79166667]
|
|
|
|
mean value: 0.7036383816684568
|
|
|
|
key: train_precision
|
|
value: [0.89215686 0.68258427 0.85950413 0.81654676 0.60891089 0.92982456
|
|
0.86290323 0.60643564 0.68156425 0.75159236]
|
|
|
|
mean value: 0.7692022951590732
|
|
|
|
key: test_recall
|
|
value: [0.75 0.92857143 0.85714286 0.88888889 1. 0.59259259
|
|
0.74074074 1. 0.89285714 0.67857143]
|
|
|
|
mean value: 0.832936507936508
|
|
|
|
key: train_recall
|
|
value: [0.73684211 0.98380567 0.84210526 0.91532258 0.99193548 0.64112903
|
|
0.86290323 0.98790323 0.98785425 0.95546559]
|
|
|
|
mean value: 0.8905266422881024
|
|
|
|
key: test_roc_auc
|
|
value: [0.82236842 0.80639098 0.82330827 0.7997076 0.76315789 0.75682261
|
|
0.73879142 0.71052632 0.74372587 0.77171815]
|
|
|
|
mean value: 0.7736517532570164
|
|
|
|
key: train_roc_auc
|
|
value: [0.83606811 0.82572636 0.87105263 0.88266129 0.7636148 0.80291746
|
|
0.88145161 0.76012808 0.8267717 0.86336329]
|
|
|
|
mean value: 0.8313755343577824
|
|
|
|
key: test_jcc
|
|
value: [0.65625 0.65 0.66666667 0.63157895 0.6 0.53333333
|
|
0.54054054 0.55102041 0.58139535 0.57575758]
|
|
|
|
mean value: 0.5986542820667012
|
|
|
|
key: train_jcc
|
|
value: [0.67657993 0.675 0.74021352 0.75919732 0.60591133 0.61153846
|
|
0.75886525 0.6019656 0.67590028 0.72615385]
|
|
|
|
mean value: 0.6831325538139378
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.32416153 0.39294767 0.2976408 0.28441954 0.27947497 0.28033257
|
|
0.27952886 0.38015819 0.32259655 0.3303647 ]
|
|
|
|
mean value: 0.3171625375747681
|
|
|
|
key: score_time
|
|
value: [0.02708912 0.04722285 0.04034209 0.02221489 0.02183366 0.02164221
|
|
0.02185822 0.0218327 0.03394938 0.02196288]
|
|
|
|
mean value: 0.027994799613952636
|
|
|
|
key: test_mcc
|
|
value: [0.78428398 0.75099943 0.81718693 0.84784086 0.84288091 0.78521625
|
|
0.73110376 0.75075302 0.8449258 0.75074384]
|
|
|
|
mean value: 0.7905934792551548
|
|
|
|
key: train_mcc
|
|
value: [0.93709216 0.95127236 0.94771566 0.90963083 0.94442545 0.93391154
|
|
0.94442545 0.93026565 0.9406126 0.93726262]
|
|
|
|
mean value: 0.9376614341252668
|
|
|
|
key: test_accuracy
|
|
value: [0.89393939 0.87878788 0.90909091 0.92307692 0.92307692 0.89230769
|
|
0.86153846 0.87692308 0.92307692 0.87692308]
|
|
|
|
mean value: 0.8958741258741258
|
|
|
|
key: train_accuracy
|
|
value: [0.9693356 0.97614991 0.97444634 0.95578231 0.97278912 0.96768707
|
|
0.97278912 0.96598639 0.97108844 0.96938776]
|
|
|
|
mean value: 0.9695442060981121
|
|
|
|
key: test_fscore
|
|
value: [0.87719298 0.85185185 0.89655172 0.9122807 0.90909091 0.87719298
|
|
0.84745763 0.85714286 0.90566038 0.84615385]
|
|
|
|
mean value: 0.8780575859521196
|
|
|
|
key: train_fscore
|
|
value: [0.96356275 0.97188755 0.96981891 0.948 0.968 0.96192385
|
|
0.968 0.95967742 0.96537678 0.96370968]
|
|
|
|
mean value: 0.9639956943265103
|
|
|
|
key: test_precision
|
|
value: [0.86206897 0.88461538 0.86666667 0.86666667 0.89285714 0.83333333
|
|
0.78125 0.82758621 0.96 0.91666667]
|
|
|
|
mean value: 0.8691711033219653
|
|
|
|
key: train_precision
|
|
value: [0.96356275 0.96414343 0.964 0.94047619 0.96031746 0.9561753
|
|
0.96031746 0.95967742 0.97131148 0.95983936]
|
|
|
|
mean value: 0.9599820841441544
|
|
|
|
key: test_recall
|
|
value: [0.89285714 0.82142857 0.92857143 0.96296296 0.92592593 0.92592593
|
|
0.92592593 0.88888889 0.85714286 0.78571429]
|
|
|
|
mean value: 0.8915343915343915
|
|
|
|
key: train_recall
|
|
value: [0.96356275 0.97975709 0.9757085 0.95564516 0.97580645 0.96774194
|
|
0.97580645 0.95967742 0.95951417 0.96761134]
|
|
|
|
mean value: 0.9680831265508685
|
|
|
|
key: test_roc_auc
|
|
value: [0.89379699 0.8712406 0.91165414 0.9288499 0.92348928 0.89717349
|
|
0.8708577 0.87865497 0.91505792 0.86583012]
|
|
|
|
mean value: 0.8956605101341943
|
|
|
|
key: train_roc_auc
|
|
value: [0.96854608 0.97664325 0.97461896 0.95576376 0.97319734 0.9676945
|
|
0.97319734 0.96513283 0.96949316 0.96914291]
|
|
|
|
mean value: 0.9693430123007922
|
|
|
|
key: test_jcc
|
|
value: [0.78125 0.74193548 0.8125 0.83870968 0.83333333 0.78125
|
|
0.73529412 0.75 0.82758621 0.73333333]
|
|
|
|
mean value: 0.78351921525006
|
|
|
|
key: train_jcc
|
|
value: [0.9296875 0.9453125 0.94140625 0.90114068 0.9379845 0.92664093
|
|
0.9379845 0.92248062 0.93307087 0.92996109]
|
|
|
|
mean value: 0.930566942909057
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08892488 0.12267137 0.10880947 0.10996723 0.11298609 0.10330486
|
|
0.11086416 0.09907532 0.10347366 0.09717202]
|
|
|
|
mean value: 0.10572490692138672
|
|
|
|
key: score_time
|
|
value: [0.02251887 0.02324319 0.03003764 0.0321846 0.03387618 0.02600455
|
|
0.0283339 0.03716874 0.07822585 0.02539444]
|
|
|
|
mean value: 0.03369879722595215
|
|
|
|
key: test_mcc
|
|
value: [0.68825771 0.75099943 0.78766655 0.84288091 0.87636164 0.78521625
|
|
0.73110376 0.82026663 0.75074384 0.78106513]
|
|
|
|
mean value: 0.7814561847630195
|
|
|
|
key: train_mcc
|
|
value: [0.97203376 0.98255071 0.96522234 0.9790807 0.98957702 0.96535892
|
|
0.98954016 0.97560652 0.98607727 0.96159228]
|
|
|
|
mean value: 0.976663968108682
|
|
|
|
key: test_accuracy
|
|
value: [0.84848485 0.87878788 0.89393939 0.92307692 0.93846154 0.89230769
|
|
0.86153846 0.90769231 0.87692308 0.89230769]
|
|
|
|
mean value: 0.8913519813519813
|
|
|
|
key: train_accuracy
|
|
value: [0.98637138 0.99148211 0.98296422 0.98979592 0.99489796 0.9829932
|
|
0.99489796 0.98809524 0.99319728 0.98129252]
|
|
|
|
mean value: 0.9885987785233343
|
|
|
|
key: test_fscore
|
|
value: [0.81481481 0.85185185 0.8627451 0.90909091 0.92857143 0.87719298
|
|
0.84745763 0.89655172 0.84615385 0.86792453]
|
|
|
|
mean value: 0.8702354810536668
|
|
|
|
key: train_fscore
|
|
value: [0.98373984 0.9898167 0.97991968 0.98785425 0.99391481 0.9795082
|
|
0.99393939 0.98580122 0.99193548 0.97759674]
|
|
|
|
mean value: 0.9864026307953017
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.88461538 0.95652174 0.89285714 0.89655172 0.83333333
|
|
0.78125 0.83870968 0.91666667 0.92 ]
|
|
|
|
mean value: 0.8766659514314095
|
|
|
|
key: train_precision
|
|
value: [0.9877551 0.99590164 0.97211155 0.99186992 1. 0.99583333
|
|
0.99595142 0.99183673 0.98795181 0.98360656]
|
|
|
|
mean value: 0.990281806350635
|
|
|
|
key: test_recall
|
|
value: [0.78571429 0.82142857 0.78571429 0.92592593 0.96296296 0.92592593
|
|
0.92592593 0.96296296 0.78571429 0.82142857]
|
|
|
|
mean value: 0.8703703703703703
|
|
|
|
key: train_recall
|
|
value: [0.97975709 0.98380567 0.98785425 0.98387097 0.98790323 0.96370968
|
|
0.99193548 0.97983871 0.99595142 0.97165992]
|
|
|
|
mean value: 0.9826286404597101
|
|
|
|
key: test_roc_auc
|
|
value: [0.84022556 0.8712406 0.87969925 0.92348928 0.9420078 0.89717349
|
|
0.8708577 0.91569201 0.86583012 0.88368726]
|
|
|
|
mean value: 0.8889903060955693
|
|
|
|
key: train_roc_auc
|
|
value: [0.98546678 0.99043225 0.98363301 0.98899431 0.99395161 0.98038425
|
|
0.99449715 0.98697818 0.99357688 0.97996486]
|
|
|
|
mean value: 0.9877879272679783
|
|
|
|
key: test_jcc
|
|
value: [0.6875 0.74193548 0.75862069 0.83333333 0.86666667 0.78125
|
|
0.73529412 0.8125 0.73333333 0.76666667]
|
|
|
|
mean value: 0.7717100291173199
|
|
|
|
key: train_jcc
|
|
value: [0.968 0.97983871 0.96062992 0.976 0.98790323 0.95983936
|
|
0.98795181 0.972 0.984 0.9561753 ]
|
|
|
|
mean value: 0.9732338320207129
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.34178162 0.28192282 0.3023963 0.30140734 0.2258141 0.21084929
|
|
0.2419467 0.19913411 0.22127748 0.30673599]
|
|
|
|
mean value: 0.2633265733718872
|
|
|
|
key: score_time
|
|
value: [0.02943897 0.02411103 0.01959991 0.02834296 0.01877308 0.02840662
|
|
0.02246118 0.01665163 0.02171493 0.02903819]
|
|
|
|
mean value: 0.02385385036468506
|
|
|
|
key: test_mcc
|
|
value: [0.49858823 0.24906563 0.32463591 0.33137828 0.39013493 0.48579325
|
|
0.31739853 0.35642509 0.49709581 0.39554427]
|
|
|
|
mean value: 0.3846059937585927
|
|
|
|
key: train_mcc
|
|
value: [0.9476116 0.9476116 0.94055988 0.96167286 0.94102929 0.95138438
|
|
0.9583433 0.95822824 0.94767646 0.94793075]
|
|
|
|
mean value: 0.9502048351347485
|
|
|
|
key: test_accuracy
|
|
value: [0.75757576 0.63636364 0.66666667 0.67692308 0.70769231 0.75384615
|
|
0.67692308 0.67692308 0.75384615 0.70769231]
|
|
|
|
mean value: 0.7014452214452215
|
|
|
|
key: train_accuracy
|
|
value: [0.97444634 0.97444634 0.97103918 0.98129252 0.97108844 0.97619048
|
|
0.97959184 0.97959184 0.9744898 0.9744898 ]
|
|
|
|
mean value: 0.975666655077704
|
|
|
|
key: test_fscore
|
|
value: [0.68 0.55555556 0.62068966 0.60377358 0.62745098 0.66666667
|
|
0.55319149 0.6440678 0.66666667 0.6122449 ]
|
|
|
|
mean value: 0.6230307293290175
|
|
|
|
key: train_fscore
|
|
value: [0.96969697 0.96969697 0.96551724 0.9778672 0.96606786 0.972
|
|
0.976 0.97590361 0.96969697 0.96993988]
|
|
|
|
mean value: 0.9712386712178342
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.57692308 0.6 0.61538462 0.66666667 0.76190476
|
|
0.65 0.59375 0.8 0.71428571]
|
|
|
|
mean value: 0.6751642107892107
|
|
|
|
key: train_precision
|
|
value: [0.96774194 0.96774194 0.96747967 0.97590361 0.95652174 0.96428571
|
|
0.96825397 0.972 0.96774194 0.96031746]
|
|
|
|
mean value: 0.966798797769377
|
|
|
|
key: test_recall
|
|
value: [0.60714286 0.53571429 0.64285714 0.59259259 0.59259259 0.59259259
|
|
0.48148148 0.7037037 0.57142857 0.53571429]
|
|
|
|
mean value: 0.5855820105820105
|
|
|
|
key: train_recall
|
|
value: [0.97165992 0.97165992 0.96356275 0.97983871 0.97580645 0.97983871
|
|
0.98387097 0.97983871 0.97165992 0.97975709]
|
|
|
|
mean value: 0.9757493143528797
|
|
|
|
key: test_roc_auc
|
|
value: [0.73778195 0.6231203 0.66353383 0.66471735 0.69103314 0.73050682
|
|
0.64863548 0.68079922 0.73166023 0.68677606]
|
|
|
|
mean value: 0.6858564391459128
|
|
|
|
key: train_roc_auc
|
|
value: [0.97406525 0.97406525 0.97001667 0.98109583 0.97172676 0.97668406
|
|
0.98017078 0.97962524 0.97409975 0.97521579]
|
|
|
|
mean value: 0.9756765374593448
|
|
|
|
key: test_jcc
|
|
value: [0.51515152 0.38461538 0.45 0.43243243 0.45714286 0.5
|
|
0.38235294 0.475 0.5 0.44117647]
|
|
|
|
mean value: 0.45378716011068954
|
|
|
|
key: train_jcc
|
|
value: [0.94117647 0.94117647 0.93333333 0.95669291 0.93436293 0.94552529
|
|
0.953125 0.95294118 0.94117647 0.94163424]
|
|
|
|
mean value: 0.9441144302391319
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.86870885 0.85635352 0.85638285 0.85381961 0.8468504 0.85592127
|
|
0.85544157 0.85547113 0.85530901 0.86230111]
|
|
|
|
mean value: 0.8566559314727783
|
|
|
|
key: score_time
|
|
value: [0.00951076 0.00944591 0.00969481 0.0093689 0.00949192 0.00937796
|
|
0.0094378 0.00957966 0.01024437 0.00959682]
|
|
|
|
mean value: 0.00957489013671875
|
|
|
|
key: test_mcc
|
|
value: [0.7225146 0.78224958 0.90695603 0.84288091 0.84297012 0.78521625
|
|
0.76761091 0.78521625 0.81512161 0.87949938]
|
|
|
|
mean value: 0.8130235643826347
|
|
|
|
key: train_mcc
|
|
value: [0.98252193 0.98602048 0.97562986 0.98609226 0.98256288 0.98256288
|
|
0.98955156 0.98606193 0.98604621 0.98954016]
|
|
|
|
mean value: 0.9846590137625983
|
|
|
|
key: test_accuracy
|
|
value: [0.86363636 0.89393939 0.95454545 0.92307692 0.92307692 0.89230769
|
|
0.87692308 0.89230769 0.90769231 0.93846154]
|
|
|
|
mean value: 0.9065967365967367
|
|
|
|
key: train_accuracy
|
|
value: [0.99148211 0.99318569 0.98807496 0.99319728 0.9914966 0.9914966
|
|
0.99489796 0.99319728 0.99319728 0.99489796]
|
|
|
|
mean value: 0.9925123712176523
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.87272727 0.94545455 0.90909091 0.90196078 0.87719298
|
|
0.86666667 0.87719298 0.88461538 0.92307692]
|
|
|
|
mean value: 0.8900083714015602
|
|
|
|
key: train_fscore
|
|
value: [0.98985801 0.99190283 0.98591549 0.99196787 0.98989899 0.98989899
|
|
0.99396378 0.99190283 0.99186992 0.99393939]
|
|
|
|
mean value: 0.9911118119763007
|
|
|
|
key: test_precision
|
|
value: [0.82758621 0.88888889 0.96296296 0.89285714 0.95833333 0.83333333
|
|
0.78787879 0.83333333 0.95833333 1. ]
|
|
|
|
mean value: 0.8943507322817668
|
|
|
|
key: train_precision
|
|
value: [0.99186992 0.99190283 0.98 0.988 0.99190283 0.99190283
|
|
0.99196787 0.99593496 0.99591837 0.99193548]
|
|
|
|
mean value: 0.9911335102776923
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.85714286 0.92857143 0.92592593 0.85185185 0.92592593
|
|
0.96296296 0.92592593 0.82142857 0.85714286]
|
|
|
|
mean value: 0.8914021164021164
|
|
|
|
key: train_recall
|
|
value: [0.98785425 0.99190283 0.99190283 0.99596774 0.98790323 0.98790323
|
|
0.99596774 0.98790323 0.98785425 0.99595142]
|
|
|
|
mean value: 0.9911110748334857
|
|
|
|
key: test_roc_auc
|
|
value: [0.86278195 0.88909774 0.95112782 0.92348928 0.91276803 0.89717349
|
|
0.88937622 0.89717349 0.89720077 0.92857143]
|
|
|
|
mean value: 0.9048760226391805
|
|
|
|
key: train_roc_auc
|
|
value: [0.99098595 0.99301024 0.98859848 0.99357211 0.99101044 0.99101044
|
|
0.99504269 0.99248102 0.99246085 0.99504316]
|
|
|
|
mean value: 0.9923215370717369
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.77419355 0.89655172 0.83333333 0.82142857 0.78125
|
|
0.76470588 0.78125 0.79310345 0.85714286]
|
|
|
|
mean value: 0.803023209233132
|
|
|
|
key: train_jcc
|
|
value: [0.97991968 0.98393574 0.97222222 0.98406375 0.98 0.98
|
|
0.988 0.98393574 0.98387097 0.98795181]
|
|
|
|
mean value: 0.9823899906871628
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03440356 0.04314494 0.04276586 0.04926753 0.04323602 0.04748821
|
|
0.05650806 0.07945347 0.04209399 0.04101968]
|
|
|
|
mean value: 0.04793813228607178
|
|
|
|
key: score_time
|
|
value: [0.01262403 0.01321626 0.01337767 0.02008343 0.01935434 0.02306819
|
|
0.02878451 0.01343203 0.01475668 0.01469183]
|
|
|
|
mean value: 0.017338895797729494
|
|
|
|
key: test_mcc
|
|
value: [ 0.17291118 0.2457578 0.12990055 0.07922783 0.32062511 0.16095202
|
|
0.15714099 0.19212549 0.16883586 -0.14572413]
|
|
|
|
mean value: 0.1481752681598879
|
|
|
|
key: train_mcc
|
|
value: [0.2822248 0.2849168 0.2849168 0.31395725 0.27434947 0.2905668
|
|
0.2984769 0.28252862 0.2842297 0.31539528]
|
|
|
|
mean value: 0.2911562429617798
|
|
|
|
key: test_accuracy
|
|
value: [0.53030303 0.5 0.46969697 0.47692308 0.56923077 0.47692308
|
|
0.49230769 0.49230769 0.50769231 0.4 ]
|
|
|
|
mean value: 0.49153846153846154
|
|
|
|
key: train_accuracy
|
|
value: [0.51959114 0.52129472 0.52129472 0.54081633 0.51530612 0.5255102
|
|
0.53061224 0.52040816 0.52040816 0.54081633]
|
|
|
|
mean value: 0.5256058130236763
|
|
|
|
key: test_fscore
|
|
value: [0.60759494 0.62921348 0.60674157 0.575 0.65 0.60465116
|
|
0.60240964 0.61176471 0.61904762 0.53012048]
|
|
|
|
mean value: 0.6036543601091233
|
|
|
|
key: train_fscore
|
|
value: [0.63659794 0.63741935 0.63741935 0.64751958 0.63508323 0.64
|
|
0.64248705 0.63753213 0.63659794 0.64659686]
|
|
|
|
mean value: 0.6397253433790993
|
|
|
|
key: test_precision
|
|
value: [0.47058824 0.45901639 0.44262295 0.43396226 0.49056604 0.44067797
|
|
0.44642857 0.44827586 0.46428571 0.4 ]
|
|
|
|
mean value: 0.44964239953281515
|
|
|
|
key: train_precision
|
|
value: [0.46691871 0.46780303 0.46780303 0.47876448 0.46529081 0.47058824
|
|
0.47328244 0.46792453 0.46691871 0.47775629]
|
|
|
|
mean value: 0.47030502678473124
|
|
|
|
key: test_recall
|
|
value: [0.85714286 1. 0.96428571 0.85185185 0.96296296 0.96296296
|
|
0.92592593 0.96296296 0.92857143 0.78571429]
|
|
|
|
mean value: 0.9202380952380952
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.57330827 0.56578947 0.53477444 0.53118908 0.62621832 0.54727096
|
|
0.55506823 0.56042885 0.55888031 0.4469112 ]
|
|
|
|
mean value: 0.5499839124839124
|
|
|
|
key: train_roc_auc
|
|
value: [0.58529412 0.58676471 0.58676471 0.60294118 0.58088235 0.58970588
|
|
0.59411765 0.58529412 0.58651026 0.60410557]
|
|
|
|
mean value: 0.5902380541659479
|
|
|
|
key: test_jcc
|
|
value: [0.43636364 0.45901639 0.43548387 0.40350877 0.48148148 0.43333333
|
|
0.43103448 0.44067797 0.44827586 0.36065574]
|
|
|
|
mean value: 0.43298315361528394
|
|
|
|
key: train_jcc
|
|
value: [0.46691871 0.46780303 0.46780303 0.47876448 0.46529081 0.47058824
|
|
0.47328244 0.46792453 0.46691871 0.47775629]
|
|
|
|
mean value: 0.47030502678473124
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.47
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02518988 0.01712132 0.04624987 0.05063629 0.05859733 0.03765702
|
|
0.03586745 0.04877877 0.04349351 0.01943111]
|
|
|
|
mean value: 0.038302254676818845
|
|
|
|
key: score_time
|
|
value: [0.0415504 0.01250839 0.01943564 0.03759766 0.0271709 0.03775263
|
|
0.01938915 0.01257777 0.02632809 0.02936435]
|
|
|
|
mean value: 0.026367497444152833
|
|
|
|
key: test_mcc
|
|
value: [0.60496141 0.65754312 0.59889727 0.5906956 0.74537234 0.52764927
|
|
0.54376443 0.54376443 0.49323276 0.69904099]
|
|
|
|
mean value: 0.6004921615016763
|
|
|
|
key: train_mcc
|
|
value: [0.74051106 0.74280727 0.76507854 0.75817252 0.75228002 0.74753143
|
|
0.74383026 0.73644504 0.75580553 0.73390716]
|
|
|
|
mean value: 0.747636882177808
|
|
|
|
key: test_accuracy
|
|
value: [0.8030303 0.83333333 0.78787879 0.8 0.87692308 0.76923077
|
|
0.76923077 0.76923077 0.75384615 0.84615385]
|
|
|
|
mean value: 0.8008857808857809
|
|
|
|
key: train_accuracy
|
|
value: [0.87223169 0.87393526 0.88415673 0.88095238 0.87755102 0.87585034
|
|
0.87414966 0.8707483 0.8792517 0.86904762]
|
|
|
|
mean value: 0.875787470013559
|
|
|
|
key: test_fscore
|
|
value: [0.77966102 0.8 0.78125 0.76363636 0.84615385 0.72727273
|
|
0.74576271 0.74576271 0.69230769 0.79166667]
|
|
|
|
mean value: 0.7673473736715262
|
|
|
|
key: train_fscore
|
|
value: [0.85207101 0.85258964 0.86614173 0.86220472 0.859375 0.85601578
|
|
0.85375494 0.84920635 0.86105675 0.84812623]
|
|
|
|
mean value: 0.8560542157264177
|
|
|
|
key: test_precision
|
|
value: [0.74193548 0.81481481 0.69444444 0.75 0.88 0.71428571
|
|
0.6875 0.6875 0.75 0.95 ]
|
|
|
|
mean value: 0.7670480457415941
|
|
|
|
key: train_precision
|
|
value: [0.83076923 0.83921569 0.84291188 0.84230769 0.83333333 0.83783784
|
|
0.8372093 0.8359375 0.83333333 0.82692308]
|
|
|
|
mean value: 0.8359778870499232
|
|
|
|
key: test_recall
|
|
value: [0.82142857 0.78571429 0.89285714 0.77777778 0.81481481 0.74074074
|
|
0.81481481 0.81481481 0.64285714 0.67857143]
|
|
|
|
mean value: 0.7784391534391535
|
|
|
|
key: train_recall
|
|
value: [0.87449393 0.86639676 0.89068826 0.88306452 0.88709677 0.875
|
|
0.87096774 0.86290323 0.89068826 0.87044534]
|
|
|
|
mean value: 0.8771744808671803
|
|
|
|
key: test_roc_auc
|
|
value: [0.80545113 0.82706767 0.80169173 0.79678363 0.86793372 0.76510721
|
|
0.77582846 0.77582846 0.74034749 0.8257722 ]
|
|
|
|
mean value: 0.7981811698916962
|
|
|
|
key: train_roc_auc
|
|
value: [0.87254108 0.87290426 0.88505001 0.88123814 0.8788425 0.87573529
|
|
0.87371917 0.86968691 0.880828 0.86924027]
|
|
|
|
mean value: 0.8759785635317444
|
|
|
|
key: test_jcc
|
|
value: [0.63888889 0.66666667 0.64102564 0.61764706 0.73333333 0.57142857
|
|
0.59459459 0.59459459 0.52941176 0.65517241]
|
|
|
|
mean value: 0.6242763527854805
|
|
|
|
key: train_jcc
|
|
value: [0.74226804 0.74305556 0.76388889 0.75778547 0.75342466 0.74827586
|
|
0.74482759 0.73793103 0.75601375 0.73630137]
|
|
|
|
mean value: 0.7483772208669934
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.45063591 0.46006775 0.47793436 0.49590611 0.43046927 0.50178194
|
|
0.40122962 0.44505405 0.47706127 0.37416697]
|
|
|
|
mean value: 0.45143072605133056
|
|
|
|
key: score_time
|
|
value: [0.01952505 0.01934576 0.0215435 0.02150774 0.01943278 0.01962686
|
|
0.01943898 0.0198617 0.01942134 0.01939178]
|
|
|
|
mean value: 0.01990954875946045
|
|
|
|
key: test_mcc
|
|
value: [0.60496141 0.5037594 0.66046615 0.5906956 0.71373784 0.52764927
|
|
0.54376443 0.54376443 0.49323276 0.69904099]
|
|
|
|
mean value: 0.5881072269958624
|
|
|
|
key: train_mcc
|
|
value: [0.74051106 0.63246193 0.7891392 0.75817252 0.78873408 0.74753143
|
|
0.74383026 0.73644504 0.75580553 0.73390716]
|
|
|
|
mean value: 0.7426538205431445
|
|
|
|
key: test_accuracy
|
|
value: [0.8030303 0.75757576 0.81818182 0.8 0.86153846 0.76923077
|
|
0.76923077 0.76923077 0.75384615 0.84615385]
|
|
|
|
mean value: 0.7948018648018649
|
|
|
|
key: train_accuracy
|
|
value: [0.87223169 0.82112436 0.89608177 0.88095238 0.8962585 0.87585034
|
|
0.87414966 0.8707483 0.8792517 0.86904762]
|
|
|
|
mean value: 0.8735696322822144
|
|
|
|
key: test_fscore
|
|
value: [0.77966102 0.71428571 0.8125 0.76363636 0.83018868 0.72727273
|
|
0.74576271 0.74576271 0.69230769 0.79166667]
|
|
|
|
mean value: 0.7603044284092414
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.85207101 0.78615071 0.87968442 0.86220472 0.87920792 0.85601578
|
|
0.85375494 0.84920635 0.86105675 0.84812623]
|
|
|
|
mean value: 0.8527478835315443
|
|
|
|
key: test_precision
|
|
value: [0.74193548 0.71428571 0.72222222 0.75 0.84615385 0.71428571
|
|
0.6875 0.6875 0.75 0.95 ]
|
|
|
|
mean value: 0.7563882980818465
|
|
|
|
key: train_precision
|
|
value: [0.83076923 0.79098361 0.85769231 0.84230769 0.86381323 0.83783784
|
|
0.8372093 0.8359375 0.83333333 0.82692308]
|
|
|
|
mean value: 0.8356807117318422
|
|
|
|
key: test_recall
|
|
value: [0.82142857 0.71428571 0.92857143 0.77777778 0.81481481 0.74074074
|
|
0.81481481 0.81481481 0.64285714 0.67857143]
|
|
|
|
mean value: 0.7748677248677248
|
|
|
|
key: train_recall
|
|
value: [0.87449393 0.78137652 0.90283401 0.88306452 0.89516129 0.875
|
|
0.87096774 0.86290323 0.89068826 0.87044534]
|
|
|
|
mean value: 0.870693483087371
|
|
|
|
key: test_roc_auc
|
|
value: [0.80545113 0.7518797 0.83270677 0.79678363 0.85477583 0.76510721
|
|
0.77582846 0.77582846 0.74034749 0.8257722 ]
|
|
|
|
mean value: 0.7924480871849293
|
|
|
|
key: train_roc_auc
|
|
value: [0.87254108 0.81568826 0.89700524 0.88123814 0.89611006 0.87573529
|
|
0.87371917 0.86968691 0.880828 0.86924027]
|
|
|
|
mean value: 0.873179241112428
|
|
|
|
key: test_jcc
|
|
value: [0.63888889 0.55555556 0.68421053 0.61764706 0.70967742 0.57142857
|
|
0.59459459 0.59459459 0.52941176 0.65517241]
|
|
|
|
mean value: 0.6151181388055349
|
|
|
|
key: train_jcc
|
|
value: [0.74226804 0.64765101 0.78521127 0.75778547 0.7844523 0.74827586
|
|
0.74482759 0.73793103 0.75601375 0.73630137]
|
|
|
|
mean value: 0.7440717677828074
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04254222 0.04146504 0.04074645 0.04208279 0.05123281 0.09679556
|
|
0.04163599 0.04167914 0.04116368 0.05056643]
|
|
|
|
mean value: 0.04899101257324219
|
|
|
|
key: score_time
|
|
value: [0.01266217 0.01570272 0.01541567 0.01549411 0.02262878 0.01518226
|
|
0.01518393 0.01532078 0.01540565 0.01556921]
|
|
|
|
mean value: 0.01585652828216553
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.63157895 0.56225353 0.69989647 0.65812266 0.60715823
|
|
0.5879298 0.50330696 0.76014254 0.84105681]
|
|
|
|
mean value: 0.648390147277257
|
|
|
|
key: train_mcc
|
|
value: [0.73067701 0.76383236 0.72533874 0.73349914 0.73403438 0.73656956
|
|
0.73748951 0.7279837 0.74098223 0.71090267]
|
|
|
|
mean value: 0.7341309300998875
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.81578947 0.77631579 0.84210526 0.82894737 0.80263158
|
|
0.78666667 0.74666667 0.88 0.92 ]
|
|
|
|
mean value: 0.8214912280701754
|
|
|
|
key: train_accuracy
|
|
value: [0.86470588 0.88088235 0.86176471 0.86617647 0.86617647 0.86764706
|
|
0.86784141 0.86343612 0.86930984 0.85462555]
|
|
|
|
mean value: 0.8662565863349745
|
|
|
|
key: test_fscore
|
|
value: [0.82051282 0.81578947 0.79518072 0.85714286 0.82666667 0.81012658
|
|
0.80487805 0.7654321 0.88311688 0.92307692]
|
|
|
|
mean value: 0.8301923076916328
|
|
|
|
key: train_fscore
|
|
value: [0.86857143 0.88510638 0.86647727 0.86981402 0.87055477 0.87142857
|
|
0.87252125 0.86733238 0.87411598 0.85917496]
|
|
|
|
mean value: 0.870509701726112
|
|
|
|
key: test_precision
|
|
value: [0.8 0.81578947 0.73333333 0.7826087 0.83783784 0.7804878
|
|
0.73333333 0.70454545 0.87179487 0.9 ]
|
|
|
|
mean value: 0.7959730805059264
|
|
|
|
key: train_precision
|
|
value: [0.84444444 0.85479452 0.83791209 0.84679666 0.84297521 0.84722222
|
|
0.84383562 0.84444444 0.84196185 0.83195592]
|
|
|
|
mean value: 0.8436342975728736
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.81578947 0.86842105 0.94736842 0.81578947 0.84210526
|
|
0.89189189 0.83783784 0.89473684 0.94736842]
|
|
|
|
mean value: 0.8703413940256045
|
|
|
|
key: train_recall
|
|
value: [0.89411765 0.91764706 0.89705882 0.89411765 0.9 0.89705882
|
|
0.90322581 0.8914956 0.90882353 0.88823529]
|
|
|
|
mean value: 0.8991780231154045
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.81578947 0.77631579 0.84210526 0.82894737 0.80263158
|
|
0.78805121 0.74786629 0.87980085 0.91963016]
|
|
|
|
mean value: 0.8216927453769559
|
|
|
|
key: train_roc_auc
|
|
value: [0.86470588 0.88088235 0.86176471 0.86617647 0.86617647 0.86764706
|
|
0.86778937 0.86339486 0.86936778 0.85467483]
|
|
|
|
mean value: 0.8662579782646196
|
|
|
|
key: test_jcc
|
|
value: [0.69565217 0.68888889 0.66 0.75 0.70454545 0.68085106
|
|
0.67346939 0.62 0.79069767 0.85714286]
|
|
|
|
mean value: 0.7121247500493738
|
|
|
|
key: train_jcc
|
|
value: [0.76767677 0.79389313 0.76441103 0.76962025 0.77078086 0.7721519
|
|
0.77386935 0.76574307 0.77638191 0.75311721]
|
|
|
|
mean value: 0.7707645469650399
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.18951631 1.26864481 1.17924285 1.51314807 1.58095288 1.18844247
|
|
1.34055877 1.3052268 1.57440996 1.71798635]
|
|
|
|
mean value: 1.3858129262924195
|
|
|
|
key: score_time
|
|
value: [0.01910329 0.01268053 0.0197413 0.0155704 0.02047348 0.01968908
|
|
0.01276493 0.01601505 0.01298952 0.01598024]
|
|
|
|
mean value: 0.01650078296661377
|
|
|
|
key: test_mcc
|
|
value: [0.65812266 0.68516016 0.52925612 0.71675803 0.68516016 0.65812266
|
|
0.52099657 0.68947215 0.7341428 0.84128135]
|
|
|
|
mean value: 0.671847267058401
|
|
|
|
key: train_mcc
|
|
value: [0.86524496 0.8051206 0.85082842 0.83365564 0.83622055 0.867831
|
|
0.85653396 0.82413609 0.7871265 0.84497093]
|
|
|
|
mean value: 0.8371668653248739
|
|
|
|
key: test_accuracy
|
|
value: [0.82894737 0.84210526 0.76315789 0.85526316 0.84210526 0.82894737
|
|
0.76 0.84 0.86666667 0.92 ]
|
|
|
|
mean value: 0.8347192982456141
|
|
|
|
key: train_accuracy
|
|
value: [0.93235294 0.90147059 0.925 0.91617647 0.91764706 0.93382353
|
|
0.92804699 0.91189427 0.8928047 0.92217327]
|
|
|
|
mean value: 0.9181389824652328
|
|
|
|
key: test_fscore
|
|
value: [0.82666667 0.83783784 0.775 0.86419753 0.83783784 0.83116883
|
|
0.76315789 0.85 0.87179487 0.91891892]
|
|
|
|
mean value: 0.8376580389826004
|
|
|
|
key: train_fscore
|
|
value: [0.93352601 0.90496454 0.92661871 0.91845494 0.91954023 0.93449782
|
|
0.92929293 0.9132948 0.89586305 0.92352092]
|
|
|
|
mean value: 0.9199573940988474
|
|
|
|
key: test_precision
|
|
value: [0.83783784 0.86111111 0.73809524 0.81395349 0.86111111 0.82051282
|
|
0.74358974 0.79069767 0.85 0.94444444]
|
|
|
|
mean value: 0.8261353469493005
|
|
|
|
key: train_precision
|
|
value: [0.91761364 0.8739726 0.90704225 0.89415042 0.8988764 0.92507205
|
|
0.91477273 0.9002849 0.86980609 0.90651558]
|
|
|
|
mean value: 0.9008106663532677
|
|
|
|
key: test_recall
|
|
value: [0.81578947 0.81578947 0.81578947 0.92105263 0.81578947 0.84210526
|
|
0.78378378 0.91891892 0.89473684 0.89473684]
|
|
|
|
mean value: 0.8518492176386914
|
|
|
|
key: train_recall
|
|
value: [0.95 0.93823529 0.94705882 0.94411765 0.94117647 0.94411765
|
|
0.94428152 0.92668622 0.92352941 0.94117647]
|
|
|
|
mean value: 0.9400379506641366
|
|
|
|
key: test_roc_auc
|
|
value: [0.82894737 0.84210526 0.76315789 0.85526316 0.84210526 0.82894737
|
|
0.76031294 0.84103841 0.86628734 0.92034139]
|
|
|
|
mean value: 0.8348506401137981
|
|
|
|
key: train_roc_auc
|
|
value: [0.93235294 0.90147059 0.925 0.91617647 0.91764706 0.93382353
|
|
0.92802312 0.91187252 0.89284975 0.92220114]
|
|
|
|
mean value: 0.9181417112299466
|
|
|
|
key: test_jcc
|
|
value: [0.70454545 0.72093023 0.63265306 0.76086957 0.72093023 0.71111111
|
|
0.61702128 0.73913043 0.77272727 0.85 ]
|
|
|
|
mean value: 0.7229918641320352
|
|
|
|
key: train_jcc
|
|
value: [0.87533875 0.82642487 0.86327078 0.84920635 0.85106383 0.87704918
|
|
0.86792453 0.84042553 0.81136951 0.85790885]
|
|
|
|
mean value: 0.8519982177100894
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01780772 0.01577997 0.01677299 0.01547742 0.01852775 0.01779866
|
|
0.01696563 0.01622939 0.01249361 0.01594472]
|
|
|
|
mean value: 0.016379785537719727
|
|
|
|
key: score_time
|
|
value: [0.01321912 0.01505661 0.01500583 0.01514411 0.01485133 0.01504803
|
|
0.01355648 0.01073337 0.01014113 0.00958538]
|
|
|
|
mean value: 0.013234138488769531
|
|
|
|
key: test_mcc
|
|
value: [0.34510572 0.33282012 0.42163702 0.50870557 0.4234049 0.61057165
|
|
0.55746481 0.30928855 0.63072008 0.38714641]
|
|
|
|
mean value: 0.4526864836698431
|
|
|
|
key: train_mcc
|
|
value: [0.48539408 0.45868273 0.54390282 0.50504481 0.51465777 0.51283942
|
|
0.49625193 0.52930047 0.49772662 0.51403568]
|
|
|
|
mean value: 0.5057836343203269
|
|
|
|
key: test_accuracy
|
|
value: [0.67105263 0.65789474 0.71052632 0.75 0.71052632 0.80263158
|
|
0.77333333 0.65333333 0.81333333 0.69333333]
|
|
|
|
mean value: 0.7235964912280701
|
|
|
|
key: train_accuracy
|
|
value: [0.74117647 0.72205882 0.77058824 0.75147059 0.75588235 0.75588235
|
|
0.74743025 0.76358297 0.74743025 0.75624082]
|
|
|
|
mean value: 0.7511743111341453
|
|
|
|
key: test_fscore
|
|
value: [0.64788732 0.70454545 0.71794872 0.77108434 0.725 0.81481481
|
|
0.79012346 0.66666667 0.80555556 0.70886076]
|
|
|
|
mean value: 0.7352487087108064
|
|
|
|
key: train_fscore
|
|
value: [0.7258567 0.75294118 0.78151261 0.76230661 0.76815642 0.76353276
|
|
0.75706215 0.77419355 0.75977654 0.76487252]
|
|
|
|
mean value: 0.7610211030692625
|
|
|
|
key: test_precision
|
|
value: [0.6969697 0.62 0.7 0.71111111 0.69047619 0.76744186
|
|
0.72727273 0.63414634 0.85294118 0.68292683]
|
|
|
|
mean value: 0.7083285933497138
|
|
|
|
key: train_precision
|
|
value: [0.77152318 0.67764706 0.7459893 0.73045822 0.73138298 0.74033149
|
|
0.73024523 0.74193548 0.72340426 0.73770492]
|
|
|
|
mean value: 0.7330622122735214
|
|
|
|
key: test_recall
|
|
value: [0.60526316 0.81578947 0.73684211 0.84210526 0.76315789 0.86842105
|
|
0.86486486 0.7027027 0.76315789 0.73684211]
|
|
|
|
mean value: 0.7699146514935988
|
|
|
|
key: train_recall
|
|
value: [0.68529412 0.84705882 0.82058824 0.79705882 0.80882353 0.78823529
|
|
0.78592375 0.80938416 0.8 0.79411765]
|
|
|
|
mean value: 0.7936484388476799
|
|
|
|
key: test_roc_auc
|
|
value: [0.67105263 0.65789474 0.71052632 0.75 0.71052632 0.80263158
|
|
0.7745377 0.65398293 0.81401138 0.69274538]
|
|
|
|
mean value: 0.7237908961593172
|
|
|
|
key: train_roc_auc
|
|
value: [0.74117647 0.72205882 0.77058824 0.75147059 0.75588235 0.75588235
|
|
0.74737364 0.76351561 0.74750733 0.75629636]
|
|
|
|
mean value: 0.7511751768155942
|
|
|
|
key: test_jcc
|
|
value: [0.47916667 0.54385965 0.56 0.62745098 0.56862745 0.6875
|
|
0.65306122 0.5 0.6744186 0.54901961]
|
|
|
|
mean value: 0.5843104184146118
|
|
|
|
key: train_jcc
|
|
value: [0.56968215 0.60377358 0.64137931 0.61590909 0.62358277 0.61751152
|
|
0.60909091 0.63157895 0.61261261 0.61926606]
|
|
|
|
mean value: 0.6144386949043872
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01229858 0.01182866 0.01095462 0.01104927 0.01118016 0.01124525
|
|
0.01126027 0.01133013 0.01135421 0.01177001]
|
|
|
|
mean value: 0.01142711639404297
|
|
|
|
key: score_time
|
|
value: [0.01016617 0.00916839 0.00915813 0.00924134 0.00927758 0.0093832
|
|
0.00935721 0.00934434 0.00941181 0.00970054]
|
|
|
|
mean value: 0.00942087173461914
|
|
|
|
key: test_mcc
|
|
value: [0.44876913 0.42163702 0.46046963 0.50870557 0.34806171 0.60547285
|
|
0.33740121 0.39112751 0.38647697 0.57437737]
|
|
|
|
mean value: 0.4482498964812848
|
|
|
|
key: train_mcc
|
|
value: [0.54391345 0.52757012 0.5421152 0.51043554 0.51499128 0.50786319
|
|
0.5198183 0.50289058 0.52633593 0.48814425]
|
|
|
|
mean value: 0.5184077820295827
|
|
|
|
key: test_accuracy
|
|
value: [0.72368421 0.71052632 0.72368421 0.75 0.67105263 0.80263158
|
|
0.66666667 0.69333333 0.69333333 0.78666667]
|
|
|
|
mean value: 0.7221578947368421
|
|
|
|
key: train_accuracy
|
|
value: [0.76911765 0.76176471 0.77058824 0.75441176 0.75588235 0.75294118
|
|
0.75917768 0.75036711 0.76211454 0.74302496]
|
|
|
|
mean value: 0.7579390170164982
|
|
|
|
key: test_fscore
|
|
value: [0.73417722 0.71794872 0.75294118 0.77108434 0.69879518 0.8
|
|
0.6835443 0.70886076 0.7012987 0.78378378]
|
|
|
|
mean value: 0.7352434176055093
|
|
|
|
key: train_fscore
|
|
value: [0.78463649 0.77562327 0.77714286 0.76379066 0.76880223 0.76338028
|
|
0.76836158 0.76190476 0.77183099 0.75386779]
|
|
|
|
mean value: 0.7689340910647905
|
|
|
|
key: test_precision
|
|
value: [0.70731707 0.7 0.68085106 0.71111111 0.64444444 0.81081081
|
|
0.64285714 0.66666667 0.69230769 0.80555556]
|
|
|
|
mean value: 0.7061921560753943
|
|
|
|
key: train_precision
|
|
value: [0.73521851 0.73298429 0.75555556 0.73569482 0.73015873 0.73243243
|
|
0.74114441 0.72922252 0.74054054 0.72237197]
|
|
|
|
mean value: 0.7355323785697852
|
|
|
|
key: test_recall
|
|
value: [0.76315789 0.73684211 0.84210526 0.84210526 0.76315789 0.78947368
|
|
0.72972973 0.75675676 0.71052632 0.76315789]
|
|
|
|
mean value: 0.769701280227596
|
|
|
|
key: train_recall
|
|
value: [0.84117647 0.82352941 0.8 0.79411765 0.81176471 0.79705882
|
|
0.79765396 0.79765396 0.80588235 0.78823529]
|
|
|
|
mean value: 0.8057072623770916
|
|
|
|
key: test_roc_auc
|
|
value: [0.72368421 0.71052632 0.72368421 0.75 0.67105263 0.80263158
|
|
0.66749644 0.69416785 0.693101 0.78698435]
|
|
|
|
mean value: 0.7223328591749645
|
|
|
|
key: train_roc_auc
|
|
value: [0.76911765 0.76176471 0.77058824 0.75441176 0.75588235 0.75294118
|
|
0.7591211 0.75029757 0.76217871 0.74309125]
|
|
|
|
mean value: 0.7579394514404002
|
|
|
|
key: test_jcc
|
|
value: [0.58 0.56 0.60377358 0.62745098 0.53703704 0.66666667
|
|
0.51923077 0.54901961 0.54 0.64444444]
|
|
|
|
mean value: 0.5827623090519872
|
|
|
|
key: train_jcc
|
|
value: [0.64559819 0.63348416 0.63551402 0.61784897 0.62443439 0.61731207
|
|
0.62385321 0.61538462 0.62844037 0.60496614]
|
|
|
|
mean value: 0.6246836141324488
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01459193 0.01216364 0.01134157 0.01200795 0.01066542 0.01007843
|
|
0.01141524 0.01643252 0.01712847 0.01489973]
|
|
|
|
mean value: 0.013072490692138672
|
|
|
|
key: score_time
|
|
value: [0.04125261 0.01972508 0.0186882 0.01966286 0.01849461 0.01937628
|
|
0.02155757 0.03381705 0.02931428 0.01900506]
|
|
|
|
mean value: 0.024089360237121583
|
|
|
|
key: test_mcc
|
|
value: [0.57894737 0.34317639 0.46046963 0.31755367 0.42163702 0.42105263
|
|
0.4164324 0.25571663 0.43985776 0.3598862 ]
|
|
|
|
mean value: 0.4014729712378804
|
|
|
|
key: train_mcc
|
|
value: [0.63662825 0.66356093 0.65899369 0.63973679 0.62745107 0.65862569
|
|
0.64487398 0.67364911 0.63317297 0.63671969]
|
|
|
|
mean value: 0.6473412168235873
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.67105263 0.72368421 0.65789474 0.71052632 0.71052632
|
|
0.70666667 0.62666667 0.72 0.68 ]
|
|
|
|
mean value: 0.6996491228070175
|
|
|
|
key: train_accuracy
|
|
value: [0.81764706 0.83088235 0.82794118 0.81764706 0.81323529 0.82794118
|
|
0.82085169 0.83553598 0.81497797 0.8164464 ]
|
|
|
|
mean value: 0.8223106158763065
|
|
|
|
key: test_fscore
|
|
value: [0.78947368 0.6835443 0.75294118 0.675 0.71794872 0.71052632
|
|
0.71794872 0.64102564 0.72727273 0.68421053]
|
|
|
|
mean value: 0.709989181077965
|
|
|
|
key: train_fscore
|
|
value: [0.82336182 0.83687943 0.83590463 0.82777778 0.81831187 0.83544304
|
|
0.82960894 0.84269663 0.82352941 0.82566248]
|
|
|
|
mean value: 0.8299176036267183
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.65853659 0.68085106 0.64285714 0.7 0.71052632
|
|
0.68292683 0.6097561 0.71794872 0.68421053]
|
|
|
|
mean value: 0.6877086963146559
|
|
|
|
key: train_precision
|
|
value: [0.79834254 0.80821918 0.79892761 0.78421053 0.79665738 0.80053908
|
|
0.792 0.80862534 0.78609626 0.78514589]
|
|
|
|
mean value: 0.7958763807154895
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.71052632 0.84210526 0.71052632 0.73684211 0.71052632
|
|
0.75675676 0.67567568 0.73684211 0.68421053]
|
|
|
|
mean value: 0.735348506401138
|
|
|
|
key: train_recall
|
|
value: [0.85 0.86764706 0.87647059 0.87647059 0.84117647 0.87352941
|
|
0.87096774 0.8797654 0.86470588 0.87058824]
|
|
|
|
mean value: 0.867132137312403
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.67105263 0.72368421 0.65789474 0.71052632 0.71052632
|
|
0.70732575 0.62731152 0.7197724 0.6799431 ]
|
|
|
|
mean value: 0.69975106685633
|
|
|
|
key: train_roc_auc
|
|
value: [0.81764706 0.83088235 0.82794118 0.81764706 0.81323529 0.82794118
|
|
0.82077799 0.83547093 0.81505089 0.81652579]
|
|
|
|
mean value: 0.8223119717095049
|
|
|
|
key: test_jcc
|
|
value: [0.65217391 0.51923077 0.60377358 0.50943396 0.56 0.55102041
|
|
0.56 0.47169811 0.57142857 0.52 ]
|
|
|
|
mean value: 0.5518759322243443
|
|
|
|
key: train_jcc
|
|
value: [0.69975787 0.7195122 0.71807229 0.70616114 0.69249395 0.7173913
|
|
0.70883055 0.72815534 0.7 0.70308789]
|
|
|
|
mean value: 0.7093462516765379
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03784871 0.04236007 0.04361248 0.04558492 0.04659319 0.04564095
|
|
0.04482865 0.04495811 0.05761099 0.04491663]
|
|
|
|
mean value: 0.04539546966552734
|
|
|
|
key: score_time
|
|
value: [0.01578951 0.01839828 0.01736403 0.01876616 0.01869655 0.01806808
|
|
0.01781964 0.01978326 0.02513433 0.01615095]
|
|
|
|
mean value: 0.01859707832336426
|
|
|
|
key: test_mcc
|
|
value: [0.60547285 0.55747847 0.58630197 0.67716122 0.58630197 0.52631579
|
|
0.65134805 0.3884542 0.76031294 0.76214986]
|
|
|
|
mean value: 0.6101297317693735
|
|
|
|
key: train_mcc
|
|
value: [0.66766961 0.72809112 0.70489516 0.69445524 0.67848055 0.70864631
|
|
0.68801723 0.68305265 0.67096452 0.68651557]
|
|
|
|
mean value: 0.6910787978687238
|
|
|
|
key: test_accuracy
|
|
value: [0.80263158 0.77631579 0.78947368 0.82894737 0.78947368 0.76315789
|
|
0.81333333 0.69333333 0.88 0.88 ]
|
|
|
|
mean value: 0.8016666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.83235294 0.86176471 0.85 0.84558824 0.83676471 0.85294118
|
|
0.84140969 0.83994126 0.83406755 0.83994126]
|
|
|
|
mean value: 0.8434771529757278
|
|
|
|
key: test_fscore
|
|
value: [0.80519481 0.79012346 0.80487805 0.84705882 0.80487805 0.76315789
|
|
0.83333333 0.7012987 0.88 0.88607595]
|
|
|
|
mean value: 0.8115999061811281
|
|
|
|
key: train_fscore
|
|
value: [0.83988764 0.86908078 0.85833333 0.85273492 0.84604716 0.85915493
|
|
0.85082873 0.84755245 0.84106892 0.85006878]
|
|
|
|
mean value: 0.8514757632535865
|
|
|
|
key: test_precision
|
|
value: [0.79487179 0.74418605 0.75 0.76595745 0.75 0.76315789
|
|
0.74468085 0.675 0.89189189 0.85365854]
|
|
|
|
mean value: 0.7733404462469863
|
|
|
|
key: train_precision
|
|
value: [0.80376344 0.82539683 0.81315789 0.8150134 0.80052493 0.82432432
|
|
0.80417755 0.81016043 0.80592992 0.79844961]
|
|
|
|
mean value: 0.8100898329567106
|
|
|
|
key: test_recall
|
|
value: [0.81578947 0.84210526 0.86842105 0.94736842 0.86842105 0.76315789
|
|
0.94594595 0.72972973 0.86842105 0.92105263]
|
|
|
|
mean value: 0.8570412517780939
|
|
|
|
key: train_recall
|
|
value: [0.87941176 0.91764706 0.90882353 0.89411765 0.89705882 0.89705882
|
|
0.90322581 0.88856305 0.87941176 0.90882353]
|
|
|
|
mean value: 0.8974141797481456
|
|
|
|
key: test_roc_auc
|
|
value: [0.80263158 0.77631579 0.78947368 0.82894737 0.78947368 0.76315789
|
|
0.81507824 0.69381223 0.88015647 0.87944523]
|
|
|
|
mean value: 0.8018492176386913
|
|
|
|
key: train_roc_auc
|
|
value: [0.83235294 0.86176471 0.85 0.84558824 0.83676471 0.85294118
|
|
0.84131879 0.83986976 0.83413403 0.84004226]
|
|
|
|
mean value: 0.8434776608590651
|
|
|
|
key: test_jcc
|
|
value: [0.67391304 0.65306122 0.67346939 0.73469388 0.67346939 0.61702128
|
|
0.71428571 0.54 0.78571429 0.79545455]
|
|
|
|
mean value: 0.6861082743079572
|
|
|
|
key: train_jcc
|
|
value: [0.72397094 0.76847291 0.75182482 0.74327628 0.73317308 0.75308642
|
|
0.74038462 0.73543689 0.72572816 0.73923445]
|
|
|
|
mean value: 0.7414588562215931
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.14132094 3.68036246 4.57753086 2.969275 4.33971977 6.14362621
|
|
5.03607988 4.16410136 6.13668132 8.23990679]
|
|
|
|
mean value: 4.842860460281372
|
|
|
|
key: score_time
|
|
value: [0.01293349 0.01284885 0.02021718 0.01773405 0.0181756 0.01277113
|
|
0.02675509 0.01705933 0.02391124 0.02288866]
|
|
|
|
mean value: 0.018529462814331054
|
|
|
|
key: test_mcc
|
|
value: [0.63510735 0.65812266 0.63510735 0.69989647 0.66366484 0.52704628
|
|
0.49858013 0.56442848 0.6022822 0.65134805]
|
|
|
|
mean value: 0.6135583806337428
|
|
|
|
key: train_mcc
|
|
value: [0.91778998 0.9353103 0.93020068 0.85628096 0.88878904 0.92234924
|
|
0.89925384 0.90657872 0.93538899 0.96502282]
|
|
|
|
mean value: 0.9156964577735124
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.82894737 0.81578947 0.84210526 0.82894737 0.76315789
|
|
0.74666667 0.77333333 0.8 0.81333333]
|
|
|
|
mean value: 0.8028070175438596
|
|
|
|
key: train_accuracy
|
|
value: [0.95882353 0.96764706 0.96470588 0.92647059 0.94411765 0.96029412
|
|
0.94860499 0.95301028 0.96769457 0.98237885]
|
|
|
|
mean value: 0.9573747516627796
|
|
|
|
key: test_fscore
|
|
value: [0.80555556 0.82666667 0.825 0.85714286 0.81690141 0.76923077
|
|
0.75949367 0.79518072 0.79452055 0.78787879]
|
|
|
|
mean value: 0.8037570986648188
|
|
|
|
key: train_fscore
|
|
value: [0.95845697 0.96774194 0.96541787 0.92957746 0.94508671 0.96148359
|
|
0.95035461 0.95389049 0.96764706 0.98214286]
|
|
|
|
mean value: 0.9581799556877332
|
|
|
|
key: test_precision
|
|
value: [0.85294118 0.83783784 0.78571429 0.7826087 0.87878788 0.75
|
|
0.71428571 0.7173913 0.82857143 0.92857143]
|
|
|
|
mean value: 0.8076709750239162
|
|
|
|
key: train_precision
|
|
value: [0.96706587 0.96491228 0.94632768 0.89189189 0.92897727 0.93351801
|
|
0.92032967 0.93767705 0.96764706 0.9939759 ]
|
|
|
|
mean value: 0.9452322689332397
|
|
|
|
key: test_recall
|
|
value: [0.76315789 0.81578947 0.86842105 0.94736842 0.76315789 0.78947368
|
|
0.81081081 0.89189189 0.76315789 0.68421053]
|
|
|
|
mean value: 0.8097439544807966
|
|
|
|
key: train_recall
|
|
value: [0.95 0.97058824 0.98529412 0.97058824 0.96176471 0.99117647
|
|
0.98240469 0.97067449 0.96764706 0.97058824]
|
|
|
|
mean value: 0.972072623770916
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.82894737 0.81578947 0.84210526 0.82894737 0.76315789
|
|
0.74751067 0.77489331 0.80049787 0.81507824]
|
|
|
|
mean value: 0.803271692745377
|
|
|
|
key: train_roc_auc
|
|
value: [0.95882353 0.96764706 0.96470588 0.92647059 0.94411765 0.96029412
|
|
0.94855529 0.9529843 0.9676945 0.98236157]
|
|
|
|
mean value: 0.9573654476453338
|
|
|
|
key: test_jcc
|
|
value: [0.6744186 0.70454545 0.70212766 0.75 0.69047619 0.625
|
|
0.6122449 0.66 0.65909091 0.65 ]
|
|
|
|
mean value: 0.6727903716297369
|
|
|
|
key: train_jcc
|
|
value: [0.92022792 0.9375 0.93314763 0.86842105 0.89589041 0.92582418
|
|
0.90540541 0.91184573 0.93732194 0.96491228]
|
|
|
|
mean value: 0.9200496545411202
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06233263 0.04449487 0.04643154 0.0430367 0.04746032 0.04887962
|
|
0.04782367 0.04799128 0.04560232 0.04688907]
|
|
|
|
mean value: 0.04809420108795166
|
|
|
|
key: score_time
|
|
value: [0.01078868 0.01080346 0.0107255 0.01083732 0.01073909 0.01087332
|
|
0.01463485 0.01082444 0.01063585 0.01067257]
|
|
|
|
mean value: 0.011153507232666015
|
|
|
|
key: test_mcc
|
|
value: [0.60547285 0.63510735 0.84327404 0.68516016 0.8160721 0.68516016
|
|
0.74339333 0.77409621 0.82825406 0.70676174]
|
|
|
|
mean value: 0.7322751995386018
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.80263158 0.81578947 0.92105263 0.84210526 0.90789474 0.84210526
|
|
0.86666667 0.88 0.90666667 0.85333333]
|
|
|
|
mean value: 0.8638245614035087
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.80555556 0.92307692 0.83783784 0.90666667 0.84615385
|
|
0.875 0.88888889 0.89855072 0.85714286]
|
|
|
|
mean value: 0.8638873299960257
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81081081 0.85294118 0.9 0.86111111 0.91891892 0.825
|
|
0.81395349 0.81818182 1. 0.84615385]
|
|
|
|
mean value: 0.8647071170019187
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.76315789 0.94736842 0.81578947 0.89473684 0.86842105
|
|
0.94594595 0.97297297 0.81578947 0.86842105]
|
|
|
|
mean value: 0.8682076813655761
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.80263158 0.81578947 0.92105263 0.84210526 0.90789474 0.84210526
|
|
0.86770982 0.88122333 0.90789474 0.85312945]
|
|
|
|
mean value: 0.8641536273115221
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.6744186 0.85714286 0.72093023 0.82926829 0.73333333
|
|
0.77777778 0.8 0.81578947 0.75 ]
|
|
|
|
mean value: 0.7625327238497075
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19184184 0.18950748 0.19028759 0.18933821 0.18971443 0.18956971
|
|
0.18968678 0.18999362 0.19175577 0.1912868 ]
|
|
|
|
mean value: 0.1902982234954834
|
|
|
|
key: score_time
|
|
value: [0.02159262 0.02172399 0.02176332 0.02140832 0.02173519 0.02172732
|
|
0.02179909 0.02168083 0.02189255 0.02173114]
|
|
|
|
mean value: 0.021705436706542968
|
|
|
|
key: test_mcc
|
|
value: [0.60547285 0.52631579 0.52704628 0.60715823 0.63245553 0.52704628
|
|
0.63072008 0.5879298 0.65362731 0.68113896]
|
|
|
|
mean value: 0.5978911102482912
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.80263158 0.76315789 0.76315789 0.80263158 0.81578947 0.76315789
|
|
0.81333333 0.78666667 0.82666667 0.84 ]
|
|
|
|
mean value: 0.797719298245614
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.76315789 0.76923077 0.81012658 0.81081081 0.75675676
|
|
0.82051282 0.80487805 0.82666667 0.83783784]
|
|
|
|
mean value: 0.7999978187611473
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81081081 0.76315789 0.75 0.7804878 0.83333333 0.77777778
|
|
0.7804878 0.73333333 0.83783784 0.86111111]
|
|
|
|
mean value: 0.7928337708697144
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.76315789 0.78947368 0.84210526 0.78947368 0.73684211
|
|
0.86486486 0.89189189 0.81578947 0.81578947]
|
|
|
|
mean value: 0.8098862019914651
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.80263158 0.76315789 0.76315789 0.80263158 0.81578947 0.76315789
|
|
0.81401138 0.78805121 0.82681366 0.84032717]
|
|
|
|
mean value: 0.797972972972973
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.61702128 0.625 0.68085106 0.68181818 0.60869565
|
|
0.69565217 0.67346939 0.70454545 0.72093023]
|
|
|
|
mean value: 0.6674650089856033
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01407838 0.01377559 0.01378489 0.01371646 0.01380324 0.01364303
|
|
0.01402044 0.01380372 0.0141356 0.01442766]
|
|
|
|
mean value: 0.013918900489807129
|
|
|
|
key: score_time
|
|
value: [0.01072931 0.0106051 0.01065826 0.01069689 0.01065588 0.01065207
|
|
0.01055241 0.01065183 0.01062202 0.01090717]
|
|
|
|
mean value: 0.010673093795776366
|
|
|
|
key: test_mcc
|
|
value: [0.48454371 0.4234049 0.23758365 0.43070552 0.37310125 0.37047929
|
|
0.48593799 0.2530298 0.30928855 0.43985776]
|
|
|
|
mean value: 0.3807932438593865
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.71052632 0.61842105 0.71052632 0.68421053 0.68421053
|
|
0.73333333 0.62666667 0.65333333 0.72 ]
|
|
|
|
mean value: 0.6878070175438596
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.69444444 0.60273973 0.73809524 0.70731707 0.7
|
|
0.76190476 0.61111111 0.63888889 0.72727273]
|
|
|
|
mean value: 0.6943678732820062
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.69565217 0.73529412 0.62857143 0.67391304 0.65909091 0.66666667
|
|
0.68085106 0.62857143 0.67647059 0.71794872]
|
|
|
|
mean value: 0.6763030137952595
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.65789474 0.57894737 0.81578947 0.76315789 0.73684211
|
|
0.86486486 0.59459459 0.60526316 0.73684211]
|
|
|
|
mean value: 0.7196301564722617
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.71052632 0.61842105 0.71052632 0.68421053 0.68421053
|
|
0.73506401 0.62624467 0.65398293 0.7197724 ]
|
|
|
|
mean value: 0.6879800853485064
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.53191489 0.43137255 0.58490566 0.54716981 0.53846154
|
|
0.61538462 0.44 0.46938776 0.57142857]
|
|
|
|
mean value: 0.5345410010096123
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.87611651 2.86236358 2.88363719 3.2326839 2.5321877 4.09309006
|
|
2.40566969 2.41896081 2.89647675 6.06011367]
|
|
|
|
mean value: 3.226129984855652
|
|
|
|
key: score_time
|
|
value: [0.11310387 0.11287737 0.11273408 0.12602067 0.11169124 0.10222006
|
|
0.09603906 0.09683657 0.22334385 0.13225412]
|
|
|
|
mean value: 0.12271208763122558
|
|
|
|
key: test_mcc
|
|
value: [0.78947368 0.71077247 0.8468098 0.86872191 0.84210526 0.71077247
|
|
0.79731451 0.85123569 0.84128135 0.86699858]
|
|
|
|
mean value: 0.8125485729466699
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.85526316 0.92105263 0.93421053 0.92105263 0.85526316
|
|
0.89333333 0.92 0.92 0.93333333]
|
|
|
|
mean value: 0.9048245614035088
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.89473684 0.85714286 0.925 0.93506494 0.92105263 0.85714286
|
|
0.9 0.925 0.91891892 0.93333333]
|
|
|
|
mean value: 0.9067392375287112
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.84615385 0.88095238 0.92307692 0.92105263 0.84615385
|
|
0.8372093 0.86046512 0.94444444 0.94594595]
|
|
|
|
mean value: 0.8900191279016249
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.86842105 0.97368421 0.94736842 0.92105263 0.86842105
|
|
0.97297297 1. 0.89473684 0.92105263]
|
|
|
|
mean value: 0.92624466571835
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.85526316 0.92105263 0.93421053 0.92105263 0.85526316
|
|
0.89438122 0.92105263 0.92034139 0.93349929]
|
|
|
|
mean value: 0.9050853485064011
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.80952381 0.75 0.86046512 0.87804878 0.85365854 0.75
|
|
0.81818182 0.86046512 0.85 0.875 ]
|
|
|
|
mean value: 0.8305343177336938
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.12780714 2.03359413 2.09809113 2.09191608 1.69352627 1.07551599
|
|
1.79946089 1.85238481 2.02269006 2.17245793]
|
|
|
|
mean value: 1.896744441986084
|
|
|
|
key: score_time
|
|
value: [0.25722885 0.1872704 0.19785881 0.17318702 0.1645987 0.21186113
|
|
0.23031354 0.23232102 0.3451438 0.18719149]
|
|
|
|
mean value: 0.21869747638702391
|
|
|
|
key: test_mcc
|
|
value: [0.78947368 0.76342228 0.87114007 0.86872191 0.84327404 0.73786479
|
|
0.82093797 0.87466428 0.8161102 0.81352334]
|
|
|
|
mean value: 0.819913255997762
|
|
|
|
key: train_mcc
|
|
value: [0.92695584 0.9207834 0.93856417 0.92392912 0.9207834 0.9384342
|
|
0.92118178 0.93248745 0.92118921 0.91267109]
|
|
|
|
mean value: 0.9256979681455813
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.88157895 0.93421053 0.93421053 0.92105263 0.86842105
|
|
0.90666667 0.93333333 0.90666667 0.90666667]
|
|
|
|
mean value: 0.9087543859649123
|
|
|
|
key: train_accuracy
|
|
value: [0.96323529 0.96029412 0.96911765 0.96176471 0.96029412 0.96911765
|
|
0.96035242 0.96622614 0.96035242 0.95594714]
|
|
|
|
mean value: 0.9626701649822925
|
|
|
|
key: test_fscore
|
|
value: [0.89473684 0.88311688 0.93670886 0.93506494 0.91891892 0.87179487
|
|
0.91139241 0.93670886 0.90410959 0.90909091]
|
|
|
|
mean value: 0.9101643075715156
|
|
|
|
key: train_fscore
|
|
value: [0.96382055 0.96069869 0.96952104 0.96231884 0.96069869 0.96943231
|
|
0.96103896 0.96642336 0.96092619 0.95677233]
|
|
|
|
mean value: 0.9631650976742234
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.87179487 0.90243902 0.92307692 0.94444444 0.85
|
|
0.85714286 0.88095238 0.94285714 0.8974359 ]
|
|
|
|
mean value: 0.8964880384200025
|
|
|
|
key: train_precision
|
|
value: [0.94871795 0.95100865 0.95702006 0.94857143 0.95100865 0.95965418
|
|
0.94602273 0.9622093 0.94586895 0.93785311]
|
|
|
|
mean value: 0.9507934987148489
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.89473684 0.97368421 0.94736842 0.89473684 0.89473684
|
|
0.97297297 1. 0.86842105 0.92105263]
|
|
|
|
mean value: 0.92624466571835
|
|
|
|
key: train_recall
|
|
value: [0.97941176 0.97058824 0.98235294 0.97647059 0.97058824 0.97941176
|
|
0.97653959 0.97067449 0.97647059 0.97647059]
|
|
|
|
mean value: 0.9758978782128687
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.88157895 0.93421053 0.93421053 0.92105263 0.86842105
|
|
0.90753912 0.93421053 0.9071835 0.90647226]
|
|
|
|
mean value: 0.9089615931721196
|
|
|
|
key: train_roc_auc
|
|
value: [0.96323529 0.96029412 0.96911765 0.96176471 0.96029412 0.96911765
|
|
0.96032862 0.9662196 0.96037606 0.95597723]
|
|
|
|
mean value: 0.9626725030188029
|
|
|
|
key: test_jcc
|
|
value: [0.80952381 0.79069767 0.88095238 0.87804878 0.85 0.77272727
|
|
0.8372093 0.88095238 0.825 0.83333333]
|
|
|
|
mean value: 0.8358444934721169
|
|
|
|
key: train_jcc
|
|
value: [0.9301676 0.92436975 0.94084507 0.9273743 0.92436975 0.94067797
|
|
0.925 0.93502825 0.92479109 0.91712707]
|
|
|
|
mean value: 0.928975083852564
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02901649 0.01712418 0.01678109 0.01688004 0.01723647 0.01719046
|
|
0.01725125 0.0176394 0.02488256 0.01711869]
|
|
|
|
mean value: 0.019112062454223634
|
|
|
|
key: score_time
|
|
value: [0.01377749 0.01419163 0.01411033 0.0138483 0.01393485 0.01353526
|
|
0.01349998 0.01357412 0.01382494 0.01391721]
|
|
|
|
mean value: 0.0138214111328125
|
|
|
|
key: test_mcc
|
|
value: [0.44876913 0.42163702 0.46046963 0.50870557 0.34806171 0.60547285
|
|
0.33740121 0.39112751 0.38647697 0.57437737]
|
|
|
|
mean value: 0.4482498964812848
|
|
|
|
key: train_mcc
|
|
value: [0.54391345 0.52757012 0.5421152 0.51043554 0.51499128 0.50786319
|
|
0.5198183 0.50289058 0.52633593 0.48814425]
|
|
|
|
mean value: 0.5184077820295827
|
|
|
|
key: test_accuracy
|
|
value: [0.72368421 0.71052632 0.72368421 0.75 0.67105263 0.80263158
|
|
0.66666667 0.69333333 0.69333333 0.78666667]
|
|
|
|
mean value: 0.7221578947368421
|
|
|
|
key: train_accuracy
|
|
value: [0.76911765 0.76176471 0.77058824 0.75441176 0.75588235 0.75294118
|
|
0.75917768 0.75036711 0.76211454 0.74302496]
|
|
|
|
mean value: 0.7579390170164982
|
|
|
|
key: test_fscore
|
|
value: [0.73417722 0.71794872 0.75294118 0.77108434 0.69879518 0.8
|
|
0.6835443 0.70886076 0.7012987 0.78378378]
|
|
|
|
mean value: 0.7352434176055093
|
|
|
|
key: train_fscore
|
|
value: [0.78463649 0.77562327 0.77714286 0.76379066 0.76880223 0.76338028
|
|
0.76836158 0.76190476 0.77183099 0.75386779]
|
|
|
|
mean value: 0.7689340910647905
|
|
|
|
key: test_precision
|
|
value: [0.70731707 0.7 0.68085106 0.71111111 0.64444444 0.81081081
|
|
0.64285714 0.66666667 0.69230769 0.80555556]
|
|
|
|
mean value: 0.7061921560753943
|
|
|
|
key: train_precision
|
|
value: [0.73521851 0.73298429 0.75555556 0.73569482 0.73015873 0.73243243
|
|
0.74114441 0.72922252 0.74054054 0.72237197]
|
|
|
|
mean value: 0.7355323785697852
|
|
|
|
key: test_recall
|
|
value: [0.76315789 0.73684211 0.84210526 0.84210526 0.76315789 0.78947368
|
|
0.72972973 0.75675676 0.71052632 0.76315789]
|
|
|
|
mean value: 0.769701280227596
|
|
|
|
key: train_recall
|
|
value: [0.84117647 0.82352941 0.8 0.79411765 0.81176471 0.79705882
|
|
0.79765396 0.79765396 0.80588235 0.78823529]
|
|
|
|
mean value: 0.8057072623770916
|
|
|
|
key: test_roc_auc
|
|
value: [0.72368421 0.71052632 0.72368421 0.75 0.67105263 0.80263158
|
|
0.66749644 0.69416785 0.693101 0.78698435]
|
|
|
|
mean value: 0.7223328591749645
|
|
|
|
key: train_roc_auc
|
|
value: [0.76911765 0.76176471 0.77058824 0.75441176 0.75588235 0.75294118
|
|
0.7591211 0.75029757 0.76217871 0.74309125]
|
|
|
|
mean value: 0.7579394514404002
|
|
|
|
key: test_jcc
|
|
value: [0.58 0.56 0.60377358 0.62745098 0.53703704 0.66666667
|
|
0.51923077 0.54901961 0.54 0.64444444]
|
|
|
|
mean value: 0.5827623090519872
|
|
|
|
key: train_jcc
|
|
value: [0.64559819 0.63348416 0.63551402 0.61784897 0.62443439 0.61731207
|
|
0.62385321 0.61538462 0.62844037 0.60496614]
|
|
|
|
mean value: 0.6246836141324488
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [7.89362502 7.2407167 7.60908389 3.2943325 2.58840609 2.48277783
|
|
2.51751781 2.52015495 2.59035778 2.5763154 ]
|
|
|
|
mean value: 4.131328797340393
|
|
|
|
key: score_time
|
|
value: [0.01742148 0.02717733 0.02067304 0.01291347 0.01362824 0.01319551
|
|
0.01345348 0.01328921 0.01412582 0.0138061 ]
|
|
|
|
mean value: 0.01596837043762207
|
|
|
|
key: test_mcc
|
|
value: [0.73786479 0.76342228 0.8183437 0.86872191 0.89473684 0.73684211
|
|
0.73786392 0.87466428 0.86956721 0.89466215]
|
|
|
|
mean value: 0.8196689174598393
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.88157895 0.90789474 0.93421053 0.94736842 0.86842105
|
|
0.86666667 0.93333333 0.93333333 0.94666667]
|
|
|
|
mean value: 0.9087894736842106
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86486486 0.88 0.91139241 0.93506494 0.94736842 0.86842105
|
|
0.87179487 0.93670886 0.93150685 0.94594595]
|
|
|
|
mean value: 0.9093068206492682
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.89189189 0.87804878 0.92307692 0.94736842 0.86842105
|
|
0.82926829 0.88095238 0.97142857 0.97222222]
|
|
|
|
mean value: 0.9051567425315821
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.86842105 0.94736842 0.94736842 0.94736842 0.86842105
|
|
0.91891892 1. 0.89473684 0.92105263]
|
|
|
|
mean value: 0.9155761024182076
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86842105 0.88157895 0.90789474 0.93421053 0.94736842 0.86842105
|
|
0.8673542 0.93421053 0.93385491 0.9470128 ]
|
|
|
|
mean value: 0.9090327169274538
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76190476 0.78571429 0.8372093 0.87804878 0.9 0.76744186
|
|
0.77272727 0.88095238 0.87179487 0.8974359 ]
|
|
|
|
mean value: 0.8353229413807973
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06677675 0.07216001 0.09838676 0.06886077 0.10840917 0.07699609
|
|
0.07368469 0.06065607 0.10425782 0.07089186]
|
|
|
|
mean value: 0.08010799884796142
|
|
|
|
key: score_time
|
|
value: [0.03830051 0.02034378 0.02046394 0.01280332 0.01740789 0.01258874
|
|
0.01256037 0.02028108 0.0206306 0.01292753]
|
|
|
|
mean value: 0.01883077621459961
|
|
|
|
key: test_mcc
|
|
value: [0.55747847 0.68421053 0.47434165 0.66934944 0.73786479 0.58218174
|
|
0.63555097 0.68450529 0.68816575 0.79731451]
|
|
|
|
mean value: 0.6510963135409902
|
|
|
|
key: train_mcc
|
|
value: [0.81346943 0.82215795 0.81346943 0.81008941 0.81346943 0.81266504
|
|
0.81477817 0.79551607 0.78811496 0.79364674]
|
|
|
|
mean value: 0.8077376632042856
|
|
|
|
key: test_accuracy
|
|
value: [0.77631579 0.84210526 0.73684211 0.82894737 0.86842105 0.78947368
|
|
0.81333333 0.82666667 0.84 0.89333333]
|
|
|
|
mean value: 0.8215438596491228
|
|
|
|
key: train_accuracy
|
|
value: [0.90588235 0.91029412 0.90588235 0.90441176 0.90588235 0.90588235
|
|
0.90602056 0.89720999 0.8928047 0.89574156]
|
|
|
|
mean value: 0.9030012092942904
|
|
|
|
key: test_fscore
|
|
value: [0.79012346 0.84210526 0.74358974 0.84337349 0.86486486 0.8
|
|
0.825 0.84705882 0.85365854 0.88571429]
|
|
|
|
mean value: 0.8295488468207594
|
|
|
|
key: train_fscore
|
|
value: [0.90883191 0.91298146 0.90883191 0.90701001 0.90883191 0.90804598
|
|
0.90985915 0.9 0.89674682 0.89929078]
|
|
|
|
mean value: 0.9060429925487885
|
|
|
|
key: test_precision
|
|
value: [0.74418605 0.84210526 0.725 0.77777778 0.88888889 0.76190476
|
|
0.76744186 0.75 0.79545455 0.96875 ]
|
|
|
|
mean value: 0.8021509144160612
|
|
|
|
key: train_precision
|
|
value: [0.88121547 0.88642659 0.88121547 0.88300836 0.88121547 0.88764045
|
|
0.87533875 0.87743733 0.86376022 0.86849315]
|
|
|
|
mean value: 0.8785751255583135
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.84210526 0.76315789 0.92105263 0.84210526 0.84210526
|
|
0.89189189 0.97297297 0.92105263 0.81578947]
|
|
|
|
mean value: 0.8654338549075391
|
|
|
|
key: train_recall
|
|
value: [0.93823529 0.94117647 0.93823529 0.93235294 0.93823529 0.92941176
|
|
0.94721408 0.92375367 0.93235294 0.93235294]
|
|
|
|
mean value: 0.9353320683111954
|
|
|
|
key: test_roc_auc
|
|
value: [0.77631579 0.84210526 0.73684211 0.82894737 0.86842105 0.78947368
|
|
0.814367 0.82859175 0.83890469 0.89438122]
|
|
|
|
mean value: 0.8218349928876245
|
|
|
|
key: train_roc_auc
|
|
value: [0.90588235 0.91029412 0.90588235 0.90441176 0.90588235 0.90588235
|
|
0.90595998 0.89717095 0.89286269 0.89579524]
|
|
|
|
mean value: 0.9030024150422633
|
|
|
|
key: test_jcc
|
|
value: [0.65306122 0.72727273 0.59183673 0.72916667 0.76190476 0.66666667
|
|
0.70212766 0.73469388 0.74468085 0.79487179]
|
|
|
|
mean value: 0.7106282964755609
|
|
|
|
key: train_jcc
|
|
value: [0.83289817 0.83989501 0.83289817 0.82984293 0.83289817 0.83157895
|
|
0.83462532 0.81818182 0.81282051 0.81701031]
|
|
|
|
mean value: 0.828264937267833
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01752853 0.01298213 0.01057148 0.01053071 0.01071167 0.0109427
|
|
0.01110673 0.01119041 0.0113492 0.01094151]
|
|
|
|
mean value: 0.011785507202148438
|
|
|
|
key: score_time
|
|
value: [0.01069331 0.01010323 0.00899935 0.00897908 0.00899887 0.00918484
|
|
0.00946784 0.00931406 0.00949073 0.0092504 ]
|
|
|
|
mean value: 0.00944817066192627
|
|
|
|
key: test_mcc
|
|
value: [0.56225353 0.4234049 0.42640143 0.56881543 0.4234049 0.61057165
|
|
0.60670651 0.36286048 0.68113896 0.65362731]
|
|
|
|
mean value: 0.5319185102217086
|
|
|
|
key: train_mcc
|
|
value: [0.53311399 0.55293751 0.5758832 0.54811747 0.55642736 0.54110997
|
|
0.54699753 0.56882747 0.53111146 0.52917163]
|
|
|
|
mean value: 0.5483697601927713
|
|
|
|
key: test_accuracy
|
|
value: [0.77631579 0.71052632 0.71052632 0.77631579 0.71052632 0.80263158
|
|
0.78666667 0.68 0.84 0.82666667]
|
|
|
|
mean value: 0.7620175438596491
|
|
|
|
key: train_accuracy
|
|
value: [0.76470588 0.775 0.78529412 0.77205882 0.77647059 0.76911765
|
|
0.77239354 0.78267254 0.76358297 0.76211454]
|
|
|
|
mean value: 0.7723410641789755
|
|
|
|
key: test_fscore
|
|
value: [0.79518072 0.725 0.73170732 0.8 0.725 0.81481481
|
|
0.81395349 0.69230769 0.83783784 0.82666667]
|
|
|
|
mean value: 0.7762468539963842
|
|
|
|
key: train_fscore
|
|
value: [0.77777778 0.78601399 0.79889807 0.7850208 0.78830084 0.78041958
|
|
0.7826087 0.79444444 0.77669903 0.7768595 ]
|
|
|
|
mean value: 0.7847042729284628
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.69047619 0.68181818 0.72340426 0.69047619 0.76744186
|
|
0.71428571 0.65853659 0.86111111 0.83783784]
|
|
|
|
mean value: 0.7358721260488679
|
|
|
|
key: train_precision
|
|
value: [0.73684211 0.74933333 0.75129534 0.74278215 0.74867725 0.744
|
|
0.75 0.75461741 0.73490814 0.73056995]
|
|
|
|
mean value: 0.7443025675209765
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.76315789 0.78947368 0.89473684 0.76315789 0.86842105
|
|
0.94594595 0.72972973 0.81578947 0.81578947]
|
|
|
|
mean value: 0.8254623044096728
|
|
|
|
key: train_recall
|
|
value: [0.82352941 0.82647059 0.85294118 0.83235294 0.83235294 0.82058824
|
|
0.81818182 0.83870968 0.82352941 0.82941176]
|
|
|
|
mean value: 0.8298067966189409
|
|
|
|
key: test_roc_auc
|
|
value: [0.77631579 0.71052632 0.71052632 0.77631579 0.71052632 0.80263158
|
|
0.78876245 0.68065434 0.84032717 0.82681366]
|
|
|
|
mean value: 0.7623399715504979
|
|
|
|
key: train_roc_auc
|
|
value: [0.76470588 0.775 0.78529412 0.77205882 0.77647059 0.76911765
|
|
0.7723262 0.78259013 0.76367086 0.76221321]
|
|
|
|
mean value: 0.7723447472830774
|
|
|
|
key: test_jcc
|
|
value: [0.66 0.56862745 0.57692308 0.66666667 0.56862745 0.6875
|
|
0.68627451 0.52941176 0.72093023 0.70454545]
|
|
|
|
mean value: 0.6369506607163926
|
|
|
|
key: train_jcc
|
|
value: [0.63636364 0.64746544 0.66513761 0.64611872 0.65057471 0.63990826
|
|
0.64285714 0.65898618 0.63492063 0.63513514]
|
|
|
|
mean value: 0.6457467467844273
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02964997 0.03408456 0.02785635 0.02643657 0.0395534 0.02772713
|
|
0.0276258 0.03434777 0.03367639 0.02488256]
|
|
|
|
mean value: 0.030584049224853516
|
|
|
|
key: score_time
|
|
value: [0.01199055 0.01222253 0.01222086 0.01222062 0.02310967 0.0122838
|
|
0.01220417 0.01290202 0.0122335 0.01225805]
|
|
|
|
mean value: 0.013364577293395996
|
|
|
|
key: test_mcc
|
|
value: [0.61580149 0.65465367 0.45580284 0.68732175 0.68421053 0.6599546
|
|
0.63072008 0.67690792 0.63803145 0.79648145]
|
|
|
|
mean value: 0.6499885777707087
|
|
|
|
key: train_mcc
|
|
value: [0.79095237 0.63950783 0.51534483 0.74012163 0.76090471 0.75727024
|
|
0.75624461 0.72844789 0.68424172 0.66651043]
|
|
|
|
mean value: 0.7039546254300463
|
|
|
|
key: test_accuracy
|
|
value: [0.80263158 0.81578947 0.69736842 0.82894737 0.84210526 0.82894737
|
|
0.81333333 0.81333333 0.8 0.89333333]
|
|
|
|
mean value: 0.8135789473684211
|
|
|
|
key: train_accuracy
|
|
value: [0.89117647 0.79264706 0.71764706 0.86323529 0.87794118 0.875
|
|
0.87812041 0.85756241 0.82085169 0.81938326]
|
|
|
|
mean value: 0.8393564826811782
|
|
|
|
key: test_fscore
|
|
value: [0.81927711 0.8372093 0.59649123 0.85057471 0.84210526 0.83544304
|
|
0.82051282 0.84090909 0.83146067 0.90243902]
|
|
|
|
mean value: 0.8176422262575207
|
|
|
|
key: train_fscore
|
|
value: [0.89863014 0.82741738 0.61445783 0.87516779 0.88456189 0.88308116
|
|
0.87812041 0.87014726 0.84711779 0.84169884]
|
|
|
|
mean value: 0.8420400484191279
|
|
|
|
key: test_precision
|
|
value: [0.75555556 0.75 0.89473684 0.75510204 0.84210526 0.80487805
|
|
0.7804878 0.7254902 0.7254902 0.84090909]
|
|
|
|
mean value: 0.787475503835953
|
|
|
|
key: train_precision
|
|
value: [0.84102564 0.70859539 0.96835443 0.80493827 0.83905013 0.82945736
|
|
0.87941176 0.80049261 0.73799127 0.74828375]
|
|
|
|
mean value: 0.8157600621897483
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.94736842 0.44736842 0.97368421 0.84210526 0.86842105
|
|
0.86486486 1. 0.97368421 0.97368421]
|
|
|
|
mean value: 0.8785917496443812
|
|
|
|
key: train_recall
|
|
value: [0.96470588 0.99411765 0.45 0.95882353 0.93529412 0.94411765
|
|
0.87683284 0.95307918 0.99411765 0.96176471]
|
|
|
|
mean value: 0.9032853199930999
|
|
|
|
key: test_roc_auc
|
|
value: [0.80263158 0.81578947 0.69736842 0.82894737 0.84210526 0.82894737
|
|
0.81401138 0.81578947 0.79765292 0.89224751]
|
|
|
|
mean value: 0.8135490753911807
|
|
|
|
key: train_roc_auc
|
|
value: [0.89117647 0.79264706 0.71764706 0.86323529 0.87794118 0.875
|
|
0.8781223 0.85742194 0.82110574 0.81959203]
|
|
|
|
mean value: 0.839388908055891
|
|
|
|
key: test_jcc
|
|
value: [0.69387755 0.72 0.425 0.74 0.72727273 0.7173913
|
|
0.69565217 0.7254902 0.71153846 0.82222222]
|
|
|
|
mean value: 0.697844463639312
|
|
|
|
key: train_jcc
|
|
value: [0.8159204 0.70563674 0.44347826 0.77804296 0.79301746 0.79064039
|
|
0.78272251 0.77014218 0.73478261 0.72666667]
|
|
|
|
mean value: 0.7341050180515637
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02871132 0.03494477 0.03013086 0.0305562 0.02615333 0.03089976
|
|
0.03173423 0.02857089 0.02462792 0.03026676]
|
|
|
|
mean value: 0.029659605026245116
|
|
|
|
key: score_time
|
|
value: [0.0121994 0.01220679 0.01224995 0.01220608 0.01222086 0.01505613
|
|
0.01224756 0.01222181 0.01276612 0.01221371]
|
|
|
|
mean value: 0.012558841705322265
|
|
|
|
key: test_mcc
|
|
value: [0.66366484 0.74620251 0.53785287 0.65875812 0.73684211 0.63510735
|
|
0.33244524 0.6289448 0.73712493 0.74989398]
|
|
|
|
mean value: 0.6426836747071496
|
|
|
|
key: train_mcc
|
|
value: [0.75946807 0.73206979 0.60041487 0.68188704 0.75527031 0.76914899
|
|
0.55892797 0.7515309 0.75373376 0.70865921]
|
|
|
|
mean value: 0.7071110924049309
|
|
|
|
key: test_accuracy
|
|
value: [0.82894737 0.86842105 0.73684211 0.80263158 0.86842105 0.81578947
|
|
0.64 0.8 0.86666667 0.86666667]
|
|
|
|
mean value: 0.8094385964912281
|
|
|
|
key: train_accuracy
|
|
value: [0.87794118 0.85441176 0.76764706 0.82794118 0.87352941 0.88088235
|
|
0.74743025 0.86930984 0.8722467 0.83700441]
|
|
|
|
mean value: 0.8408344130603783
|
|
|
|
key: test_fscore
|
|
value: [0.81690141 0.87804878 0.78723404 0.83516484 0.86842105 0.825
|
|
0.49056604 0.82352941 0.875 0.88095238]
|
|
|
|
mean value: 0.808081794974105
|
|
|
|
key: train_fscore
|
|
value: [0.87171561 0.87058824 0.81055156 0.84864166 0.88219178 0.88858322
|
|
0.67175573 0.88053691 0.88130969 0.85859873]
|
|
|
|
mean value: 0.8464473110250429
|
|
|
|
key: test_precision
|
|
value: [0.87878788 0.81818182 0.66071429 0.71698113 0.86842105 0.78571429
|
|
0.8125 0.72916667 0.83333333 0.80434783]
|
|
|
|
mean value: 0.7908148279192275
|
|
|
|
key: train_precision
|
|
value: [0.91856678 0.78352941 0.68421053 0.75750577 0.82564103 0.83462532
|
|
0.96174863 0.81188119 0.82188295 0.75730337]
|
|
|
|
mean value: 0.8156894980074346
|
|
|
|
key: test_recall
|
|
value: [0.76315789 0.94736842 0.97368421 1. 0.86842105 0.86842105
|
|
0.35135135 0.94594595 0.92105263 0.97368421]
|
|
|
|
mean value: 0.8613086770981508
|
|
|
|
key: train_recall
|
|
value: [0.82941176 0.97941176 0.99411765 0.96470588 0.94705882 0.95
|
|
0.51612903 0.96187683 0.95 0.99117647]
|
|
|
|
mean value: 0.9083888218043816
|
|
|
|
key: test_roc_auc
|
|
value: [0.82894737 0.86842105 0.73684211 0.80263158 0.86842105 0.81578947
|
|
0.63620199 0.80192034 0.86593172 0.86522048]
|
|
|
|
mean value: 0.8090327169274538
|
|
|
|
key: train_roc_auc
|
|
value: [0.87794118 0.85441176 0.76764706 0.82794118 0.87352941 0.88088235
|
|
0.7477704 0.86917371 0.8723607 0.83723046]
|
|
|
|
mean value: 0.8408888218043816
|
|
|
|
key: test_jcc
|
|
value: [0.69047619 0.7826087 0.64912281 0.71698113 0.76744186 0.70212766
|
|
0.325 0.7 0.77777778 0.78723404]
|
|
|
|
mean value: 0.6898770165591933
|
|
|
|
key: train_jcc
|
|
value: [0.77260274 0.77083333 0.68145161 0.73707865 0.78921569 0.79950495
|
|
0.50574713 0.78657074 0.78780488 0.75223214]
|
|
|
|
mean value: 0.738304186516552
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.25938892 0.2407124 0.24690509 0.24088907 0.24046206 0.24187708
|
|
0.23969173 0.24876094 0.2406652 0.24076819]
|
|
|
|
mean value: 0.24401206970214845
|
|
|
|
key: score_time
|
|
value: [0.01600456 0.01612997 0.01601338 0.01607347 0.01611042 0.01616073
|
|
0.016047 0.01584435 0.01720834 0.01627564]
|
|
|
|
mean value: 0.016186785697937012
|
|
|
|
key: test_mcc
|
|
value: [0.78947368 0.78947368 0.8183437 0.87114007 0.86872191 0.73684211
|
|
0.79143584 0.87466428 0.84128135 0.89466215]
|
|
|
|
mean value: 0.8276038764870668
|
|
|
|
key: train_mcc
|
|
value: [0.94707521 0.94119275 0.92650666 0.92679534 0.92947609 0.94117647
|
|
0.92370396 0.94127845 0.9443026 0.9443026 ]
|
|
|
|
mean value: 0.9365810139893734
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.89473684 0.90789474 0.93421053 0.93421053 0.86842105
|
|
0.89333333 0.93333333 0.92 0.94666667]
|
|
|
|
mean value: 0.9127543859649123
|
|
|
|
key: train_accuracy
|
|
value: [0.97352941 0.97058824 0.96323529 0.96323529 0.96470588 0.97058824
|
|
0.96182085 0.97063142 0.97209985 0.97209985]
|
|
|
|
mean value: 0.9682534335320031
|
|
|
|
key: test_fscore
|
|
value: [0.89473684 0.89473684 0.91139241 0.93670886 0.93333333 0.86842105
|
|
0.8974359 0.93670886 0.91891892 0.94594595]
|
|
|
|
mean value: 0.913833895905848
|
|
|
|
key: train_fscore
|
|
value: [0.97345133 0.97050147 0.96339678 0.96371553 0.96491228 0.97058824
|
|
0.96209913 0.97076023 0.97226277 0.97226277]
|
|
|
|
mean value: 0.968395053375338
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.89473684 0.87804878 0.90243902 0.94594595 0.86842105
|
|
0.85365854 0.88095238 0.94444444 0.97222222]
|
|
|
|
mean value: 0.9035606071870513
|
|
|
|
key: train_precision
|
|
value: [0.97633136 0.97337278 0.95918367 0.9512894 0.95930233 0.97058824
|
|
0.95652174 0.96793003 0.96521739 0.96521739]
|
|
|
|
mean value: 0.9644954325531186
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.89473684 0.94736842 0.97368421 0.92105263 0.86842105
|
|
0.94594595 1. 0.89473684 0.92105263]
|
|
|
|
mean value: 0.9261735419630156
|
|
|
|
key: train_recall
|
|
value: [0.97058824 0.96764706 0.96764706 0.97647059 0.97058824 0.97058824
|
|
0.96774194 0.97360704 0.97941176 0.97941176]
|
|
|
|
mean value: 0.9723701914783509
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.89473684 0.90789474 0.93421053 0.93421053 0.86842105
|
|
0.8940256 0.93421053 0.92034139 0.9470128 ]
|
|
|
|
mean value: 0.9129800853485064
|
|
|
|
key: train_roc_auc
|
|
value: [0.97352941 0.97058824 0.96323529 0.96323529 0.96470588 0.97058824
|
|
0.96181214 0.97062705 0.97211057 0.97211057]
|
|
|
|
mean value: 0.9682542694497154
|
|
|
|
key: test_jcc
|
|
value: [0.80952381 0.80952381 0.8372093 0.88095238 0.875 0.76744186
|
|
0.81395349 0.88095238 0.85 0.8974359 ]
|
|
|
|
mean value: 0.8421992929551069
|
|
|
|
key: train_jcc
|
|
value: [0.94827586 0.94269341 0.92937853 0.92997199 0.93220339 0.94285714
|
|
0.92696629 0.94318182 0.94602273 0.94602273]
|
|
|
|
mean value: 0.9387573889229806
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1156733 0.13879728 0.13017058 0.13234949 0.12597513 0.12639976
|
|
0.13745451 0.10307813 0.10984015 0.0775187 ]
|
|
|
|
mean value: 0.11972570419311523
|
|
|
|
key: score_time
|
|
value: [0.02559948 0.02842474 0.02678871 0.03508401 0.02295709 0.03692055
|
|
0.0227046 0.03501654 0.03241634 0.02127242]
|
|
|
|
mean value: 0.028718447685241698
|
|
|
|
key: test_mcc
|
|
value: [0.73684211 0.8183437 0.8183437 0.8183437 0.89473684 0.76342228
|
|
0.74339333 0.85123569 0.86956721 0.81352334]
|
|
|
|
mean value: 0.8127751897706782
|
|
|
|
key: train_mcc
|
|
value: [0.98529838 0.97951769 0.98825239 0.98825239 0.97951769 0.98830369
|
|
0.99414347 0.98535412 0.98832054 0.98531987]
|
|
|
|
mean value: 0.9862280224473037
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.90789474 0.90789474 0.90789474 0.94736842 0.88157895
|
|
0.86666667 0.92 0.93333333 0.90666667]
|
|
|
|
mean value: 0.9047719298245614
|
|
|
|
key: train_accuracy
|
|
value: [0.99264706 0.98970588 0.99411765 0.99411765 0.98970588 0.99411765
|
|
0.99706314 0.99265786 0.99412628 0.99265786]
|
|
|
|
mean value: 0.9930916904206617
|
|
|
|
key: test_fscore
|
|
value: [0.86842105 0.90410959 0.91139241 0.91139241 0.94736842 0.88311688
|
|
0.875 0.925 0.93150685 0.90909091]
|
|
|
|
mean value: 0.9066398514374749
|
|
|
|
key: train_fscore
|
|
value: [0.99263623 0.98962963 0.9941349 0.99410029 0.98962963 0.99408284
|
|
0.99705882 0.99263623 0.99408284 0.99263623]
|
|
|
|
mean value: 0.9930627644856893
|
|
|
|
key: test_precision
|
|
value: [0.86842105 0.94285714 0.87804878 0.87804878 0.94736842 0.87179487
|
|
0.81395349 0.86046512 0.97142857 0.8974359 ]
|
|
|
|
mean value: 0.8929822122827467
|
|
|
|
key: train_precision
|
|
value: [0.99410029 0.99701493 0.99122807 0.99704142 0.99701493 1.
|
|
1. 0.99704142 1. 0.99410029]
|
|
|
|
mean value: 0.9967541351128895
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.86842105 0.94736842 0.94736842 0.94736842 0.89473684
|
|
0.94594595 1. 0.89473684 0.92105263]
|
|
|
|
mean value: 0.9235419630156472
|
|
|
|
key: train_recall
|
|
value: [0.99117647 0.98235294 0.99705882 0.99117647 0.98235294 0.98823529
|
|
0.9941349 0.98826979 0.98823529 0.99117647]
|
|
|
|
mean value: 0.9894169397964464
|
|
|
|
key: test_roc_auc
|
|
value: [0.86842105 0.90789474 0.90789474 0.90789474 0.94736842 0.88157895
|
|
0.86770982 0.92105263 0.93385491 0.90647226]
|
|
|
|
mean value: 0.9050142247510669
|
|
|
|
key: train_roc_auc
|
|
value: [0.99264706 0.98970588 0.99411765 0.99411765 0.98970588 0.99411765
|
|
0.99706745 0.99266431 0.99411765 0.99265568]
|
|
|
|
mean value: 0.9930916853544938
|
|
|
|
key: test_jcc
|
|
value: [0.76744186 0.825 0.8372093 0.8372093 0.9 0.79069767
|
|
0.77777778 0.86046512 0.87179487 0.83333333]
|
|
|
|
mean value: 0.8300929238719936
|
|
|
|
key: train_jcc
|
|
value: [0.98538012 0.97947214 0.98833819 0.98826979 0.97947214 0.98823529
|
|
0.9941349 0.98538012 0.98823529 0.98538012]
|
|
|
|
mean value: 0.9862298105139351
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.35254955 0.34180379 0.35496879 0.58395147 0.41598964 0.34471917
|
|
0.41314936 0.37673903 0.64443469 0.37615204]
|
|
|
|
mean value: 0.4204457521438599
|
|
|
|
key: score_time
|
|
value: [0.03205991 0.03060985 0.03298879 0.04649806 0.03177595 0.03139973
|
|
0.03534102 0.01836991 0.03432822 0.02413678]
|
|
|
|
mean value: 0.031750822067260744
|
|
|
|
key: test_mcc
|
|
value: [0.57894737 0.48454371 0.5383819 0.47970161 0.58630197 0.50156549
|
|
0.52770861 0.41986625 0.54661936 0.60000015]
|
|
|
|
mean value: 0.5263636431601073
|
|
|
|
key: train_mcc
|
|
value: [0.93633146 0.9424982 0.95049758 0.94503779 0.93608802 0.93413645
|
|
0.93111706 0.94537466 0.93950187 0.93336249]
|
|
|
|
mean value: 0.9393945591798148
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.73684211 0.76315789 0.73684211 0.78947368 0.75
|
|
0.76 0.70666667 0.77333333 0.8 ]
|
|
|
|
mean value: 0.760578947368421
|
|
|
|
key: train_accuracy
|
|
value: [0.96764706 0.97058824 0.975 0.97205882 0.96764706 0.96617647
|
|
0.96475771 0.97209985 0.969163 0.96622614]
|
|
|
|
mean value: 0.9691364343094065
|
|
|
|
key: test_fscore
|
|
value: [0.78947368 0.76190476 0.78571429 0.75609756 0.80487805 0.73972603
|
|
0.775 0.725 0.77922078 0.80519481]
|
|
|
|
mean value: 0.7722209953398516
|
|
|
|
key: train_fscore
|
|
value: [0.9683908 0.9713467 0.97539797 0.97266187 0.96829971 0.96718973
|
|
0.96581197 0.97281831 0.96987088 0.96690647]
|
|
|
|
mean value: 0.9698694422382902
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.69565217 0.7173913 0.70454545 0.75 0.77142857
|
|
0.72093023 0.6744186 0.76923077 0.79487179]
|
|
|
|
mean value: 0.7387942589757288
|
|
|
|
key: train_precision
|
|
value: [0.94662921 0.94692737 0.96011396 0.95211268 0.94915254 0.93905817
|
|
0.93905817 0.94972067 0.94677871 0.94647887]
|
|
|
|
mean value: 0.9476030364933398
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.84210526 0.86842105 0.81578947 0.86842105 0.71052632
|
|
0.83783784 0.78378378 0.78947368 0.81578947]
|
|
|
|
mean value: 0.8121621621621622
|
|
|
|
key: train_recall
|
|
value: [0.99117647 0.99705882 0.99117647 0.99411765 0.98823529 0.99705882
|
|
0.9941349 0.99706745 0.99411765 0.98823529]
|
|
|
|
mean value: 0.9932378816629291
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.73684211 0.76315789 0.73684211 0.78947368 0.75
|
|
0.76102418 0.70768137 0.77311522 0.79978663]
|
|
|
|
mean value: 0.7607396870554766
|
|
|
|
key: train_roc_auc
|
|
value: [0.96764706 0.97058824 0.975 0.97205882 0.96764706 0.96617647
|
|
0.96471451 0.97206314 0.96919959 0.96625841]
|
|
|
|
mean value: 0.9691353286182509
|
|
|
|
key: test_jcc
|
|
value: [0.65217391 0.61538462 0.64705882 0.60784314 0.67346939 0.58695652
|
|
0.63265306 0.56862745 0.63829787 0.67391304]
|
|
|
|
mean value: 0.6296377826730208
|
|
|
|
key: train_jcc
|
|
value: [0.93871866 0.94428969 0.9519774 0.94677871 0.93854749 0.93646409
|
|
0.9338843 0.94707521 0.94150418 0.93593315]
|
|
|
|
mean value: 0.941517287593141
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.13097405 1.05412555 1.13440681 1.10352182 1.12288213 1.10454321
|
|
1.09815526 1.12077451 1.10359883 1.09796929]
|
|
|
|
mean value: 1.1070951461791991
|
|
|
|
key: score_time
|
|
value: [0.00992966 0.00986409 0.00992966 0.01039696 0.01056504 0.01512766
|
|
0.01047087 0.01035357 0.0112443 0.01109767]
|
|
|
|
mean value: 0.010897946357727051
|
|
|
|
key: test_mcc
|
|
value: [0.71077247 0.79056942 0.92137172 0.84327404 0.89473684 0.73786479
|
|
0.73786392 0.87466428 0.84500776 0.92302421]
|
|
|
|
mean value: 0.827914943952
|
|
|
|
key: train_mcc
|
|
value: [0.98236994 0.99118076 0.98823529 0.98825239 0.98235294 0.98236994
|
|
0.98531999 0.98535412 0.98237882 0.98825254]
|
|
|
|
mean value: 0.985606672896309
|
|
|
|
key: test_accuracy
|
|
value: [0.85526316 0.89473684 0.96052632 0.92105263 0.94736842 0.86842105
|
|
0.86666667 0.93333333 0.92 0.96 ]
|
|
|
|
mean value: 0.9127368421052632
|
|
|
|
key: train_accuracy
|
|
value: [0.99117647 0.99558824 0.99411765 0.99411765 0.99117647 0.99117647
|
|
0.99265786 0.99265786 0.99118943 0.99412628]
|
|
|
|
mean value: 0.9927984365552389
|
|
|
|
key: test_fscore
|
|
value: [0.85333333 0.89189189 0.96 0.92307692 0.94736842 0.87179487
|
|
0.87179487 0.93670886 0.91666667 0.95890411]
|
|
|
|
mean value: 0.9131539949959725
|
|
|
|
key: train_fscore
|
|
value: [0.99115044 0.99559471 0.99411765 0.9941349 0.99117647 0.99115044
|
|
0.99265786 0.99263623 0.99117647 0.99411765]
|
|
|
|
mean value: 0.9927912817110572
|
|
|
|
key: test_precision
|
|
value: [0.86486486 0.91666667 0.97297297 0.9 0.94736842 0.85
|
|
0.82926829 0.88095238 0.97058824 1. ]
|
|
|
|
mean value: 0.9132681834486561
|
|
|
|
key: train_precision
|
|
value: [0.99408284 0.9941349 0.99411765 0.99122807 0.99117647 0.99408284
|
|
0.99411765 0.99704142 0.99117647 0.99411765]
|
|
|
|
mean value: 0.99352759504808
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.86842105 0.94736842 0.94736842 0.94736842 0.89473684
|
|
0.91891892 1. 0.86842105 0.92105263]
|
|
|
|
mean value: 0.9155761024182076
|
|
|
|
key: train_recall
|
|
value: [0.98823529 0.99705882 0.99411765 0.99705882 0.99117647 0.98823529
|
|
0.99120235 0.98826979 0.99117647 0.99411765]
|
|
|
|
mean value: 0.9920648611350699
|
|
|
|
key: test_roc_auc
|
|
value: [0.85526316 0.89473684 0.96052632 0.92105263 0.94736842 0.86842105
|
|
0.8673542 0.93421053 0.92069701 0.96052632]
|
|
|
|
mean value: 0.9130156472261736
|
|
|
|
key: train_roc_auc
|
|
value: [0.99117647 0.99558824 0.99411765 0.99411765 0.99117647 0.99117647
|
|
0.99266 0.99266431 0.99118941 0.99412627]
|
|
|
|
mean value: 0.9927992927376229
|
|
|
|
key: test_jcc
|
|
value: [0.74418605 0.80487805 0.92307692 0.85714286 0.9 0.77272727
|
|
0.77272727 0.88095238 0.84615385 0.92105263]
|
|
|
|
mean value: 0.8422897279651615
|
|
|
|
key: train_jcc
|
|
value: [0.98245614 0.99122807 0.98830409 0.98833819 0.98250729 0.98245614
|
|
0.98542274 0.98538012 0.98250729 0.98830409]
|
|
|
|
mean value: 0.9856904165174841
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0465939 0.07136464 0.1288743 0.04371285 0.04643869 0.06755185
|
|
0.08158183 0.05100441 0.11657476 0.06610966]
|
|
|
|
mean value: 0.07198069095611573
|
|
|
|
key: score_time
|
|
value: [0.03602362 0.02572703 0.01350999 0.01328588 0.01336932 0.02561569
|
|
0.01333332 0.03729224 0.02285767 0.01297545]
|
|
|
|
mean value: 0.02139902114868164
|
|
|
|
key: test_mcc
|
|
value: [0.31448545 0.19518001 0.04072315 0.29277002 0.33658092 0.22750788
|
|
0.22491669 0.22491669 0.3251513 0.1639348 ]
|
|
|
|
mean value: 0.23461669173564317
|
|
|
|
key: train_mcc
|
|
value: [0.30536468 0.30823376 0.33333333 0.30536468 0.32232919 0.32232919
|
|
0.31129171 0.32254115 0.30467179 0.32159529]
|
|
|
|
mean value: 0.31570547675592
|
|
|
|
key: test_accuracy
|
|
value: [0.61842105 0.55263158 0.51315789 0.57894737 0.61842105 0.56578947
|
|
0.56 0.56 0.6 0.54666667]
|
|
|
|
mean value: 0.5714035087719298
|
|
|
|
key: train_accuracy
|
|
value: [0.58529412 0.58676471 0.6 0.58529412 0.59411765 0.59411765
|
|
0.58883994 0.59471366 0.58443465 0.59324523]
|
|
|
|
mean value: 0.590682171547033
|
|
|
|
key: test_fscore
|
|
value: [0.71287129 0.68518519 0.64761905 0.7037037 0.7184466 0.69158879
|
|
0.68571429 0.68571429 0.71698113 0.68518519]
|
|
|
|
mean value: 0.6933009499314354
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
train_fscore
|
|
value: [0.70686071 0.70759625 0.71428571 0.70686071 0.71129707 0.71129707
|
|
0.70893971 0.71189979 0.70612669 0.71055381]
|
|
|
|
mean value: 0.7095717525777357
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.52857143 0.50746269 0.54285714 0.56923077 0.53623188
|
|
0.52941176 0.52941176 0.55882353 0.52857143]
|
|
|
|
mean value: 0.5402000970108005
|
|
|
|
key: train_precision
|
|
value: [0.54662379 0.54750403 0.55555556 0.54662379 0.55194805 0.55194805
|
|
0.54911433 0.55267423 0.54574639 0.55105348]
|
|
|
|
mean value: 0.5498791708555821
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.97368421 0.89473684 1. 0.97368421 0.97368421
|
|
0.97297297 0.97297297 1. 0.97368421]
|
|
|
|
mean value: 0.9682788051209104
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.61842105 0.55263158 0.51315789 0.57894737 0.61842105 0.56578947
|
|
0.56543385 0.56543385 0.59459459 0.54089616]
|
|
|
|
mean value: 0.5713726884779516
|
|
|
|
key: train_roc_auc
|
|
value: [0.58529412 0.58676471 0.6 0.58529412 0.59411765 0.59411765
|
|
0.58823529 0.59411765 0.58504399 0.59384164]
|
|
|
|
mean value: 0.5906826806969122
|
|
|
|
key: test_jcc
|
|
value: [0.55384615 0.52112676 0.47887324 0.54285714 0.56060606 0.52857143
|
|
0.52173913 0.52173913 0.55882353 0.52112676]
|
|
|
|
mean value: 0.5309309336725496
|
|
|
|
key: train_jcc
|
|
value: [0.54662379 0.54750403 0.55555556 0.54662379 0.55194805 0.55194805
|
|
0.54911433 0.55267423 0.54574639 0.55105348]
|
|
|
|
mean value: 0.5498791708555821
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.47
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01532841 0.01702619 0.01847219 0.01534581 0.03934741 0.04185438
|
|
0.04375839 0.05411839 0.03648376 0.06336451]
|
|
|
|
mean value: 0.03450994491577149
|
|
|
|
key: score_time
|
|
value: [0.03911352 0.01217103 0.01803064 0.02205896 0.03201556 0.03694797
|
|
0.04308152 0.05942559 0.04073262 0.0300374 ]
|
|
|
|
mean value: 0.03336148262023926
|
|
|
|
key: test_mcc
|
|
value: [0.6599546 0.71077247 0.61580149 0.75373466 0.76342228 0.60715823
|
|
0.61162266 0.61919192 0.7341428 0.8161102 ]
|
|
|
|
mean value: 0.689191130952638
|
|
|
|
key: train_mcc
|
|
value: [0.7831091 0.78778201 0.77951095 0.77993265 0.7803823 0.79092188
|
|
0.79247662 0.77075998 0.77165227 0.76669045]
|
|
|
|
mean value: 0.7803218207970757
|
|
|
|
key: test_accuracy
|
|
value: [0.82894737 0.85526316 0.80263158 0.86842105 0.88157895 0.80263158
|
|
0.8 0.8 0.86666667 0.90666667]
|
|
|
|
mean value: 0.841280701754386
|
|
|
|
key: train_accuracy
|
|
value: [0.88970588 0.89264706 0.88823529 0.88823529 0.88823529 0.89411765
|
|
0.89427313 0.88399413 0.88399413 0.88105727]
|
|
|
|
mean value: 0.8884495119633756
|
|
|
|
key: test_fscore
|
|
value: [0.83544304 0.85333333 0.81927711 0.88095238 0.88 0.81012658
|
|
0.81481481 0.81927711 0.87179487 0.90410959]
|
|
|
|
mean value: 0.8489128827057131
|
|
|
|
key: train_fscore
|
|
value: [0.89481066 0.89674682 0.89295775 0.89325843 0.89355742 0.89830508
|
|
0.89944134 0.88888889 0.88920056 0.88702929]
|
|
|
|
mean value: 0.8934196237269306
|
|
|
|
key: test_precision
|
|
value: [0.80487805 0.86486486 0.75555556 0.80434783 0.89189189 0.7804878
|
|
0.75 0.73913043 0.85 0.94285714]
|
|
|
|
mean value: 0.8184013569697557
|
|
|
|
key: train_precision
|
|
value: [0.85522788 0.86376022 0.85675676 0.85483871 0.85294118 0.86413043
|
|
0.85866667 0.85405405 0.84986595 0.84350133]
|
|
|
|
mean value: 0.8553743176431853
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.84210526 0.89473684 0.97368421 0.86842105 0.84210526
|
|
0.89189189 0.91891892 0.89473684 0.86842105]
|
|
|
|
mean value: 0.8863442389758179
|
|
|
|
key: train_recall
|
|
value: [0.93823529 0.93235294 0.93235294 0.93529412 0.93823529 0.93529412
|
|
0.94428152 0.92668622 0.93235294 0.93529412]
|
|
|
|
mean value: 0.9350379506641366
|
|
|
|
key: test_roc_auc
|
|
value: [0.82894737 0.85526316 0.80263158 0.86842105 0.88157895 0.80263158
|
|
0.8012091 0.80156472 0.86628734 0.9071835 ]
|
|
|
|
mean value: 0.8415718349928877
|
|
|
|
key: train_roc_auc
|
|
value: [0.88970588 0.89264706 0.88823529 0.88823529 0.88823529 0.89411765
|
|
0.89419959 0.88393134 0.88406503 0.88113679]
|
|
|
|
mean value: 0.8884509228911506
|
|
|
|
key: test_jcc
|
|
value: [0.7173913 0.74418605 0.69387755 0.78723404 0.78571429 0.68085106
|
|
0.6875 0.69387755 0.77272727 0.825 ]
|
|
|
|
mean value: 0.7388359117724808
|
|
|
|
key: train_jcc
|
|
value: [0.80964467 0.81282051 0.80661578 0.8071066 0.80759494 0.81538462
|
|
0.81725888 0.8 0.80050505 0.79699248]
|
|
|
|
mean value: 0.8073923524987736
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.64401865 0.59611702 0.48502159 0.66273427 0.62189841 0.64343715
|
|
0.48427701 0.41426158 0.56429815 0.74364185]
|
|
|
|
mean value: 0.5859705686569214
|
|
|
|
key: score_time
|
|
value: [0.03630757 0.02685738 0.02595925 0.01560163 0.02484679 0.02134538
|
|
0.01717734 0.04061079 0.02449322 0.04164886]
|
|
|
|
mean value: 0.027484822273254394
|
|
|
|
key: test_mcc
|
|
value: [0.6599546 0.71077247 0.58630197 0.75373466 0.76342228 0.60715823
|
|
0.58162979 0.61919192 0.7341428 0.8161102 ]
|
|
|
|
mean value: 0.6832418930232662
|
|
|
|
key: train_mcc
|
|
value: [0.7831091 0.78778201 0.81138597 0.77993265 0.7803823 0.79092188
|
|
0.82028945 0.77075998 0.77165227 0.76669045]
|
|
|
|
mean value: 0.7862906055854441
|
|
|
|
key: test_accuracy
|
|
value: [0.82894737 0.85526316 0.78947368 0.86842105 0.88157895 0.80263158
|
|
0.78666667 0.8 0.86666667 0.90666667]
|
|
|
|
mean value: 0.8386315789473684
|
|
|
|
key: train_accuracy
|
|
value: [0.88970588 0.89264706 0.90441176 0.88823529 0.88823529 0.89411765
|
|
0.90895742 0.88399413 0.88399413 0.88105727]
|
|
|
|
mean value: 0.8915355878034033
|
|
|
|
key: test_fscore
|
|
value: [0.83544304 0.85333333 0.80487805 0.88095238 0.88 0.81012658
|
|
0.8 0.81927711 0.87179487 0.90410959]
|
|
|
|
mean value: 0.8459914952589069
|
|
|
|
key: train_fscore
|
|
value: [0.89481066 0.89674682 0.90806223 0.89325843 0.89355742 0.89830508
|
|
0.91242938 0.88888889 0.88920056 0.88702929]
|
|
|
|
mean value: 0.8962288763334292
|
|
|
|
key: test_precision
|
|
value: [0.80487805 0.86486486 0.75 0.80434783 0.89189189 0.7804878
|
|
0.74418605 0.73913043 0.85 0.94285714]
|
|
|
|
mean value: 0.8172644060653629
|
|
|
|
key: train_precision
|
|
value: [0.85522788 0.86376022 0.8746594 0.85483871 0.85294118 0.86413043
|
|
0.88010899 0.85405405 0.84986595 0.84350133]
|
|
|
|
mean value: 0.8593088145379002
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.84210526 0.86842105 0.97368421 0.86842105 0.84210526
|
|
0.86486486 0.91891892 0.89473684 0.86842105]
|
|
|
|
mean value: 0.8810099573257468
|
|
|
|
key: train_recall
|
|
value: [0.93823529 0.93235294 0.94411765 0.93529412 0.93823529 0.93529412
|
|
0.94721408 0.92668622 0.93235294 0.93529412]
|
|
|
|
mean value: 0.9365076763843367
|
|
|
|
key: test_roc_auc
|
|
value: [0.82894737 0.85526316 0.78947368 0.86842105 0.88157895 0.80263158
|
|
0.78769559 0.80156472 0.86628734 0.9071835 ]
|
|
|
|
mean value: 0.8389046941678521
|
|
|
|
key: train_roc_auc
|
|
value: [0.88970588 0.89264706 0.90441176 0.88823529 0.88823529 0.89411765
|
|
0.90890116 0.88393134 0.88406503 0.88113679]
|
|
|
|
mean value: 0.8915387269277213
|
|
|
|
key: test_jcc
|
|
value: [0.7173913 0.74418605 0.67346939 0.78723404 0.78571429 0.68085106
|
|
0.66666667 0.69387755 0.77272727 0.825 ]
|
|
|
|
mean value: 0.7347117621126168
|
|
|
|
key: train_jcc
|
|
value: [0.80964467 0.81282051 0.83160622 0.8071066 0.80759494 0.81538462
|
|
0.83896104 0.8 0.80050505 0.79699248]
|
|
|
|
mean value: 0.8120616122235199
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07275963 0.07579088 0.12557936 0.16045475 0.18968654 0.09939933
|
|
0.15617132 0.25500178 0.09219623 0.07406187]
|
|
|
|
mean value: 0.13011016845703124
|
|
|
|
key: score_time
|
|
value: [0.01302624 0.01517487 0.02265215 0.01664901 0.02367878 0.02577996
|
|
0.01198912 0.01668167 0.01757336 0.01557446]
|
|
|
|
mean value: 0.017877960205078126
|
|
|
|
key: test_mcc
|
|
value: [0.65812266 0.58218174 0.50870557 0.75373466 0.6599546 0.61057165
|
|
0.5488252 0.59621136 0.70676174 0.7875998 ]
|
|
|
|
mean value: 0.6412668999808843
|
|
|
|
key: train_mcc
|
|
value: [0.72786026 0.74788739 0.72454549 0.71825622 0.70687365 0.72738038
|
|
0.73647766 0.71548418 0.72496976 0.71090267]
|
|
|
|
mean value: 0.7240637657844566
|
|
|
|
key: test_accuracy
|
|
value: [0.82894737 0.78947368 0.75 0.86842105 0.82894737 0.80263158
|
|
0.77333333 0.78666667 0.85333333 0.89333333]
|
|
|
|
mean value: 0.8175087719298246
|
|
|
|
key: train_accuracy
|
|
value: [0.86323529 0.87352941 0.86176471 0.85882353 0.85294118 0.86323529
|
|
0.86784141 0.85756241 0.86196769 0.85462555]
|
|
|
|
mean value: 0.8615526474907144
|
|
|
|
key: test_fscore
|
|
value: [0.83116883 0.77777778 0.77108434 0.88095238 0.82191781 0.81481481
|
|
0.77922078 0.80952381 0.85714286 0.8974359 ]
|
|
|
|
mean value: 0.8241039293605724
|
|
|
|
key: train_fscore
|
|
value: [0.86733238 0.87643678 0.86532951 0.86167147 0.85673352 0.86657102
|
|
0.87106017 0.86002886 0.86532951 0.85917496]
|
|
|
|
mean value: 0.8649668198842196
|
|
|
|
key: test_precision
|
|
value: [0.82051282 0.82352941 0.71111111 0.80434783 0.85714286 0.76744186
|
|
0.75 0.72340426 0.84615385 0.875 ]
|
|
|
|
mean value: 0.7978643988556563
|
|
|
|
key: train_precision
|
|
value: [0.84210526 0.85674157 0.84357542 0.84463277 0.83519553 0.84593838
|
|
0.85154062 0.84659091 0.84357542 0.83195592]
|
|
|
|
mean value: 0.844185179682083
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.73684211 0.84210526 0.97368421 0.78947368 0.86842105
|
|
0.81081081 0.91891892 0.86842105 0.92105263]
|
|
|
|
mean value: 0.8571834992887625
|
|
|
|
key: train_recall
|
|
value: [0.89411765 0.89705882 0.88823529 0.87941176 0.87941176 0.88823529
|
|
0.8914956 0.87390029 0.88823529 0.88823529]
|
|
|
|
mean value: 0.8868337070898741
|
|
|
|
key: test_roc_auc
|
|
value: [0.82894737 0.78947368 0.75 0.86842105 0.82894737 0.80263158
|
|
0.77382646 0.78840683 0.85312945 0.89295875]
|
|
|
|
mean value: 0.817674253200569
|
|
|
|
key: train_roc_auc
|
|
value: [0.86323529 0.87352941 0.86176471 0.85882353 0.85294118 0.86323529
|
|
0.86780662 0.85753838 0.86200621 0.85467483]
|
|
|
|
mean value: 0.8615555459720545
|
|
|
|
key: test_jcc
|
|
value: [0.71111111 0.63636364 0.62745098 0.78723404 0.69767442 0.6875
|
|
0.63829787 0.68 0.75 0.81395349]
|
|
|
|
mean value: 0.7029585549737266
|
|
|
|
key: train_jcc
|
|
value: [0.76574307 0.78005115 0.76262626 0.75696203 0.74937343 0.76455696
|
|
0.7715736 0.75443038 0.76262626 0.75311721]
|
|
|
|
mean value: 0.762106036091155
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.37897372 3.04852676 3.86349392 4.12482429 2.46299362 3.25987411
|
|
2.60137796 3.90386128 3.66474605 3.28770685]
|
|
|
|
mean value: 3.2596378564834594
|
|
|
|
key: score_time
|
|
value: [0.01781082 0.03481722 0.0172348 0.01329756 0.02077794 0.02104497
|
|
0.0124588 0.05787635 0.01498485 0.01940107]
|
|
|
|
mean value: 0.02297043800354004
|
|
|
|
key: test_mcc
|
|
value: [0.63157895 0.74095857 0.55436186 0.7228974 0.63245553 0.71675803
|
|
0.52357624 0.72871879 0.7875998 0.8161102 ]
|
|
|
|
mean value: 0.6855015372844508
|
|
|
|
key: train_mcc
|
|
value: [0.86849267 0.83565564 0.84415051 0.82422845 0.82805156 0.84235777
|
|
0.85083195 0.82413609 0.83909325 0.84765527]
|
|
|
|
mean value: 0.8404653146321241
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.86842105 0.77631579 0.85526316 0.81578947 0.85526316
|
|
0.76 0.85333333 0.89333333 0.90666667]
|
|
|
|
mean value: 0.8400175438596491
|
|
|
|
key: train_accuracy
|
|
value: [0.93382353 0.91764706 0.92205882 0.91176471 0.91323529 0.92058824
|
|
0.92511013 0.91189427 0.91923642 0.9236417 ]
|
|
|
|
mean value: 0.9199000172756328
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.81578947 0.86111111 0.78481013 0.86746988 0.81081081 0.86419753
|
|
0.76923077 0.86746988 0.8974359 0.90410959]
|
|
|
|
mean value: 0.8442435067796515
|
|
|
|
key: train_fscore
|
|
value: [0.9352518 0.91884058 0.92240117 0.91354467 0.91583452 0.9226361
|
|
0.92661871 0.9132948 0.92063492 0.92463768]
|
|
|
|
mean value: 0.9213694947943573
|
|
|
|
key: test_precision
|
|
value: [0.81578947 0.91176471 0.75609756 0.8 0.83333333 0.81395349
|
|
0.73170732 0.7826087 0.875 0.94285714]
|
|
|
|
mean value: 0.8263111717830087
|
|
|
|
key: train_precision
|
|
value: [0.91549296 0.90571429 0.91836735 0.89548023 0.88919668 0.89944134
|
|
0.90960452 0.9002849 0.90368272 0.91142857]
|
|
|
|
mean value: 0.9048693544104865
|
|
|
|
key: test_recall
|
|
value: [0.81578947 0.81578947 0.81578947 0.94736842 0.78947368 0.92105263
|
|
0.81081081 0.97297297 0.92105263 0.86842105]
|
|
|
|
mean value: 0.8678520625889047
|
|
|
|
key: train_recall
|
|
value: [0.95588235 0.93235294 0.92647059 0.93235294 0.94411765 0.94705882
|
|
0.94428152 0.92668622 0.93823529 0.93823529]
|
|
|
|
mean value: 0.9385673624288425
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.86842105 0.77631579 0.85526316 0.81578947 0.85526316
|
|
0.76066856 0.85490754 0.89295875 0.9071835 ]
|
|
|
|
mean value: 0.8402560455192034
|
|
|
|
key: train_roc_auc
|
|
value: [0.93382353 0.91764706 0.92205882 0.91176471 0.91323529 0.92058824
|
|
0.92508194 0.91187252 0.91926427 0.9236631 ]
|
|
|
|
mean value: 0.9198999482490944
|
|
|
|
key: test_jcc
|
|
value: [0.68888889 0.75609756 0.64583333 0.76595745 0.68181818 0.76086957
|
|
0.625 0.76595745 0.81395349 0.825 ]
|
|
|
|
mean value: 0.732937591222252
|
|
|
|
key: train_jcc
|
|
value: [0.87837838 0.84986595 0.85597826 0.84084881 0.84473684 0.85638298
|
|
0.86327078 0.84042553 0.85294118 0.85983827]
|
|
|
|
mean value: 0.8542666978983275
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.03602529 0.01382279 0.01373482 0.01347613 0.01373649 0.0131526
|
|
0.01357841 0.01357794 0.01336479 0.01327491]
|
|
|
|
mean value: 0.01577441692352295
|
|
|
|
key: score_time
|
|
value: [0.01084423 0.01119113 0.0111475 0.0111289 0.01112747 0.01079035
|
|
0.01081443 0.01090288 0.01077151 0.0109241 ]
|
|
|
|
mean value: 0.010964250564575196
|
|
|
|
key: test_mcc
|
|
value: [0.45515762 0.30242157 0.31755367 0.37047929 0.42163702 0.63960215
|
|
0.46657183 0.3349972 0.70697013 0.43985776]
|
|
|
|
mean value: 0.44552482486641504
|
|
|
|
key: train_mcc
|
|
value: [0.44921971 0.4289817 0.5510521 0.48892094 0.50125035 0.4883705
|
|
0.49236773 0.50125246 0.48132576 0.49211419]
|
|
|
|
mean value: 0.48748554437105274
|
|
|
|
key: test_accuracy
|
|
value: [0.72368421 0.64473684 0.65789474 0.68421053 0.71052632 0.81578947
|
|
0.73333333 0.66666667 0.85333333 0.72 ]
|
|
|
|
mean value: 0.7210175438596491
|
|
|
|
key: train_accuracy
|
|
value: [0.71764706 0.70441176 0.775 0.74411765 0.75 0.74411765
|
|
0.74596182 0.75036711 0.74008811 0.74596182]
|
|
|
|
mean value: 0.741767297227261
|
|
|
|
key: test_fscore
|
|
value: [0.69565217 0.68965517 0.675 0.7 0.71794872 0.82926829
|
|
0.72972973 0.67532468 0.85333333 0.72727273]
|
|
|
|
mean value: 0.7293184822618947
|
|
|
|
key: train_fscore
|
|
value: [0.67785235 0.74329502 0.78174037 0.75071633 0.75852273 0.74709302
|
|
0.7517934 0.75644699 0.74822191 0.74891147]
|
|
|
|
mean value: 0.7464593585656054
|
|
|
|
key: test_precision
|
|
value: [0.77419355 0.6122449 0.64285714 0.66666667 0.7 0.77272727
|
|
0.72972973 0.65 0.86486486 0.71794872]
|
|
|
|
mean value: 0.7131232841140676
|
|
|
|
key: train_precision
|
|
value: [0.7890625 0.65688488 0.75900277 0.73184358 0.73351648 0.73850575
|
|
0.73595506 0.7394958 0.72451791 0.73925501]
|
|
|
|
mean value: 0.7348039727153357
|
|
|
|
key: test_recall
|
|
value: [0.63157895 0.78947368 0.71052632 0.73684211 0.73684211 0.89473684
|
|
0.72972973 0.7027027 0.84210526 0.73684211]
|
|
|
|
mean value: 0.7511379800853485
|
|
|
|
key: train_recall
|
|
value: [0.59411765 0.85588235 0.80588235 0.77058824 0.78529412 0.75588235
|
|
0.76832845 0.77419355 0.77352941 0.75882353]
|
|
|
|
mean value: 0.7642521994134898
|
|
|
|
key: test_roc_auc
|
|
value: [0.72368421 0.64473684 0.65789474 0.68421053 0.71052632 0.81578947
|
|
0.73328592 0.66714083 0.85348506 0.7197724 ]
|
|
|
|
mean value: 0.7210526315789474
|
|
|
|
key: train_roc_auc
|
|
value: [0.71764706 0.70441176 0.775 0.74411765 0.75 0.74411765
|
|
0.74592893 0.75033207 0.74013714 0.74598068]
|
|
|
|
mean value: 0.7417672934276349
|
|
|
|
key: test_jcc
|
|
value: [0.53333333 0.52631579 0.50943396 0.53846154 0.56 0.70833333
|
|
0.57446809 0.50980392 0.74418605 0.57142857]
|
|
|
|
mean value: 0.577576458148125
|
|
|
|
key: train_jcc
|
|
value: [0.51269036 0.59146341 0.64168618 0.60091743 0.61098398 0.5962877
|
|
0.60229885 0.60829493 0.59772727 0.59860789]
|
|
|
|
mean value: 0.5960958011344802
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01359725 0.01359296 0.01358175 0.01369858 0.01353526 0.01356125
|
|
0.01366591 0.01367211 0.01356363 0.01364732]
|
|
|
|
mean value: 0.013611602783203124
|
|
|
|
key: score_time
|
|
value: [0.01087713 0.01089525 0.0107882 0.0108788 0.01078677 0.01087093
|
|
0.01081085 0.01085949 0.010952 0.01081491]
|
|
|
|
mean value: 0.010853433609008789
|
|
|
|
key: test_mcc
|
|
value: [0.52704628 0.36893239 0.37310125 0.37686733 0.26462806 0.58218174
|
|
0.33740121 0.3349972 0.33309617 0.62660028]
|
|
|
|
mean value: 0.41248519170310405
|
|
|
|
key: train_mcc
|
|
value: [0.53430003 0.52557935 0.51304448 0.52531378 0.54048656 0.48732394
|
|
0.52176681 0.50112681 0.53073607 0.49660188]
|
|
|
|
mean value: 0.5176279715359968
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.68421053 0.68421053 0.68421053 0.63157895 0.78947368
|
|
0.66666667 0.66666667 0.66666667 0.81333333]
|
|
|
|
mean value: 0.7050175438596491
|
|
|
|
key: train_accuracy
|
|
value: [0.76617647 0.76176471 0.75588235 0.76176471 0.76911765 0.74264706
|
|
0.76064611 0.75036711 0.76358297 0.74743025]
|
|
|
|
mean value: 0.7579379372894532
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.69230769 0.70731707 0.71428571 0.65 0.77777778
|
|
0.6835443 0.67532468 0.67532468 0.81578947]
|
|
|
|
mean value: 0.7160902154903714
|
|
|
|
key: train_fscore
|
|
value: [0.77574048 0.77183099 0.76420455 0.77118644 0.77918425 0.75386779
|
|
0.7661406 0.75574713 0.77607789 0.75706215]
|
|
|
|
mean value: 0.7671042252842464
|
|
|
|
key: test_precision
|
|
value: [0.75 0.675 0.65909091 0.65217391 0.61904762 0.82352941
|
|
0.64285714 0.65 0.66666667 0.81578947]
|
|
|
|
mean value: 0.6954155136154733
|
|
|
|
key: train_precision
|
|
value: [0.74525745 0.74054054 0.73901099 0.74184783 0.74663073 0.72237197
|
|
0.75 0.74084507 0.73614776 0.72826087]
|
|
|
|
mean value: 0.7390913200874492
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.71052632 0.76315789 0.78947368 0.68421053 0.73684211
|
|
0.72972973 0.7027027 0.68421053 0.81578947]
|
|
|
|
mean value: 0.7406116642958749
|
|
|
|
key: train_recall
|
|
value: [0.80882353 0.80588235 0.79117647 0.80294118 0.81470588 0.78823529
|
|
0.7829912 0.771261 0.82058824 0.78823529]
|
|
|
|
mean value: 0.7974840434707607
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.68421053 0.68421053 0.68421053 0.63157895 0.78947368
|
|
0.66749644 0.66714083 0.66642959 0.81330014]
|
|
|
|
mean value: 0.7051209103840683
|
|
|
|
key: train_roc_auc
|
|
value: [0.76617647 0.76176471 0.75588235 0.76176471 0.76911765 0.74264706
|
|
0.76061325 0.75033638 0.76366655 0.74749008]
|
|
|
|
mean value: 0.7579459203036053
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.52941176 0.54716981 0.55555556 0.48148148 0.63636364
|
|
0.51923077 0.50980392 0.50980392 0.68888889]
|
|
|
|
mean value: 0.5602709750684224
|
|
|
|
key: train_jcc
|
|
value: [0.63364055 0.62844037 0.6183908 0.62758621 0.63824885 0.60496614
|
|
0.62093023 0.6073903 0.63409091 0.60909091]
|
|
|
|
mean value: 0.6222775270314147
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.0128057 0.01217151 0.01274586 0.0126667 0.01253891 0.01253581
|
|
0.0127933 0.01320362 0.01242256 0.01235414]
|
|
|
|
mean value: 0.012623810768127441
|
|
|
|
key: score_time
|
|
value: [0.04706025 0.02677512 0.02521372 0.02637076 0.02574992 0.02654791
|
|
0.03812981 0.02444649 0.02515459 0.02471375]
|
|
|
|
mean value: 0.02901623249053955
|
|
|
|
key: test_mcc
|
|
value: [0.57894737 0.34222378 0.46737879 0.34317639 0.42105263 0.47434165
|
|
0.44071833 0.17295378 0.36286048 0.3064868 ]
|
|
|
|
mean value: 0.39101400016139753
|
|
|
|
key: train_mcc
|
|
value: [0.62708115 0.62968418 0.63923419 0.62285804 0.64559651 0.64252296
|
|
0.66655877 0.63586647 0.6151091 0.629042 ]
|
|
|
|
mean value: 0.6353553364154553
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.67105263 0.72368421 0.67105263 0.71052632 0.73684211
|
|
0.72 0.58666667 0.68 0.65333333]
|
|
|
|
mean value: 0.6942631578947368
|
|
|
|
key: train_accuracy
|
|
value: [0.81323529 0.81470588 0.81911765 0.81029412 0.82205882 0.82058824
|
|
0.83259912 0.81791483 0.8061674 0.81350954]
|
|
|
|
mean value: 0.8170190895741557
|
|
|
|
key: test_fscore
|
|
value: [0.78947368 0.67532468 0.75862069 0.6835443 0.71052632 0.74358974
|
|
0.70422535 0.57534247 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6973980563566494
|
|
|
|
key: train_fscore
|
|
value: [0.81726619 0.8173913 0.82403433 0.8180536 0.82788051 0.82621083
|
|
0.83806818 0.81924198 0.81460674 0.82036775]
|
|
|
|
mean value: 0.8223121418038274
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.66666667 0.67346939 0.65853659 0.71052632 0.725
|
|
0.73529412 0.58333333 0.70588235 0.65 ]
|
|
|
|
mean value: 0.6898182443709191
|
|
|
|
key: train_precision
|
|
value: [0.8 0.80571429 0.80222841 0.78590786 0.80165289 0.80110497
|
|
0.81267218 0.81449275 0.77956989 0.79019074]
|
|
|
|
mean value: 0.7993533980086487
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.68421053 0.86842105 0.71052632 0.71052632 0.76315789
|
|
0.67567568 0.56756757 0.63157895 0.68421053]
|
|
|
|
mean value: 0.7085348506401138
|
|
|
|
key: train_recall
|
|
value: [0.83529412 0.82941176 0.84705882 0.85294118 0.85588235 0.85294118
|
|
0.86510264 0.82404692 0.85294118 0.85294118]
|
|
|
|
mean value: 0.8468561324823184
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.67105263 0.72368421 0.67105263 0.71052632 0.73684211
|
|
0.71941679 0.58641536 0.68065434 0.65291607]
|
|
|
|
mean value: 0.6942034139402561
|
|
|
|
key: train_roc_auc
|
|
value: [0.81323529 0.81470588 0.81911765 0.81029412 0.82205882 0.82058824
|
|
0.83255132 0.81790581 0.80623598 0.81356736]
|
|
|
|
mean value: 0.8170260479558393
|
|
|
|
key: test_jcc
|
|
value: [0.65217391 0.50980392 0.61111111 0.51923077 0.55102041 0.59183673
|
|
0.54347826 0.40384615 0.5 0.5 ]
|
|
|
|
mean value: 0.5382501272526848
|
|
|
|
key: train_jcc
|
|
value: [0.69099757 0.69117647 0.70072993 0.69212411 0.70631068 0.7038835
|
|
0.72127139 0.69382716 0.68720379 0.69544365]
|
|
|
|
mean value: 0.6982968234964712
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05335617 0.04748607 0.04872799 0.04849339 0.04869008 0.04856324
|
|
0.04872131 0.04892969 0.0491817 0.04905438]
|
|
|
|
mean value: 0.049120402336120604
|
|
|
|
key: score_time
|
|
value: [0.0192523 0.0192728 0.02065706 0.01959968 0.01960993 0.01966596
|
|
0.01975584 0.01971316 0.0200603 0.01953459]
|
|
|
|
mean value: 0.019712162017822266
|
|
|
|
key: test_mcc
|
|
value: [0.60547285 0.52925612 0.50870557 0.65465367 0.52704628 0.55282303
|
|
0.4953682 0.4164324 0.7341428 0.65500602]
|
|
|
|
mean value: 0.5678906943419609
|
|
|
|
key: train_mcc
|
|
value: [0.68539535 0.70122333 0.7113305 0.67987852 0.67576397 0.72217168
|
|
0.68752613 0.66431362 0.68221133 0.67582777]
|
|
|
|
mean value: 0.6885642200000757
|
|
|
|
key: test_accuracy
|
|
value: [0.80263158 0.76315789 0.75 0.81578947 0.76315789 0.77631579
|
|
0.74666667 0.70666667 0.86666667 0.82666667]
|
|
|
|
mean value: 0.7817719298245615
|
|
|
|
key: train_accuracy
|
|
value: [0.84117647 0.84852941 0.85294118 0.83823529 0.83529412 0.85882353
|
|
0.84140969 0.83113069 0.83847283 0.83406755]
|
|
|
|
mean value: 0.8420080763582967
|
|
|
|
key: test_fscore
|
|
value: [0.80519481 0.775 0.77108434 0.8372093 0.76923077 0.77922078
|
|
0.75324675 0.71794872 0.87179487 0.83544304]
|
|
|
|
mean value: 0.791537337428636
|
|
|
|
key: train_fscore
|
|
value: [0.84831461 0.85634589 0.86149584 0.84593838 0.84487535 0.86629526
|
|
0.85041551 0.83779972 0.84764543 0.84541724]
|
|
|
|
mean value: 0.8504543219888906
|
|
|
|
key: test_precision
|
|
value: [0.79487179 0.73809524 0.71111111 0.75 0.75 0.76923077
|
|
0.725 0.68292683 0.85 0.80487805]
|
|
|
|
mean value: 0.7576113791357694
|
|
|
|
key: train_precision
|
|
value: [0.81182796 0.81432361 0.81413613 0.80748663 0.79842932 0.82275132
|
|
0.80577428 0.80706522 0.80104712 0.79028133]
|
|
|
|
mean value: 0.8073122909158496
|
|
|
|
key: test_recall
|
|
value: [0.81578947 0.81578947 0.84210526 0.94736842 0.78947368 0.78947368
|
|
0.78378378 0.75675676 0.89473684 0.86842105]
|
|
|
|
mean value: 0.8303698435277382
|
|
|
|
key: train_recall
|
|
value: [0.88823529 0.90294118 0.91470588 0.88823529 0.89705882 0.91470588
|
|
0.90029326 0.87096774 0.9 0.90882353]
|
|
|
|
mean value: 0.898596687942039
|
|
|
|
key: test_roc_auc
|
|
value: [0.80263158 0.76315789 0.75 0.81578947 0.76315789 0.77631579
|
|
0.74715505 0.70732575 0.86628734 0.82610242]
|
|
|
|
mean value: 0.7817923186344239
|
|
|
|
key: train_roc_auc
|
|
value: [0.84117647 0.84852941 0.85294118 0.83823529 0.83529412 0.85882353
|
|
0.8413231 0.83107211 0.83856305 0.83417716]
|
|
|
|
mean value: 0.842013541486976
|
|
|
|
key: test_jcc
|
|
value: [0.67391304 0.63265306 0.62745098 0.72 0.625 0.63829787
|
|
0.60416667 0.56 0.77272727 0.7173913 ]
|
|
|
|
mean value: 0.6571600201177099
|
|
|
|
key: train_jcc
|
|
value: [0.73658537 0.74878049 0.756691 0.73300971 0.73141487 0.76412776
|
|
0.73975904 0.72087379 0.73557692 0.73222749]
|
|
|
|
mean value: 0.7399046425977518
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [7.1758182 5.87345672 6.46495247 3.48258066 4.64701009 2.46476197
|
|
3.94435692 3.10128665 3.98406339 2.64380097]
|
|
|
|
mean value: 4.378208804130554
|
|
|
|
key: score_time
|
|
value: [0.0241847 0.02087307 0.02701283 0.02011752 0.01293445 0.01277781
|
|
0.01933622 0.01277947 0.01566601 0.01325059]
|
|
|
|
mean value: 0.017893266677856446
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.55747847 0.63245553 0.76376262 0.68421053 0.58218174
|
|
0.57714816 0.5733843 0.65975905 0.70676174]
|
|
|
|
mean value: 0.6369597662687557
|
|
|
|
key: train_mcc
|
|
value: [0.9206878 0.92059222 0.91474149 0.87082049 0.92148545 0.92455371
|
|
0.91068292 0.88022367 0.94420342 0.88327194]
|
|
|
|
mean value: 0.9091263086203635
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.77631579 0.81578947 0.86842105 0.84210526 0.78947368
|
|
0.78666667 0.77333333 0.82666667 0.85333333]
|
|
|
|
mean value: 0.8147894736842105
|
|
|
|
key: train_accuracy
|
|
value: [0.96029412 0.96029412 0.95735294 0.93382353 0.96029412 0.96176471
|
|
0.95447871 0.93979442 0.97209985 0.93979442]
|
|
|
|
mean value: 0.9539990930292822
|
|
|
|
key: test_fscore
|
|
value: [0.81081081 0.76056338 0.82051282 0.88372093 0.84210526 0.8
|
|
0.79487179 0.8 0.81690141 0.85714286]
|
|
|
|
mean value: 0.8186629265461131
|
|
|
|
key: train_fscore
|
|
value: [0.96 0.96023564 0.95716396 0.93653032 0.96115108 0.96264368
|
|
0.95590327 0.94100719 0.97201767 0.94233474]
|
|
|
|
mean value: 0.9548987559776294
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.81818182 0.8 0.79166667 0.84210526 0.76190476
|
|
0.75609756 0.70833333 0.87878788 0.84615385]
|
|
|
|
mean value: 0.8036564462495143
|
|
|
|
key: train_precision
|
|
value: [0.96716418 0.96165192 0.96142433 0.899729 0.94084507 0.94101124
|
|
0.9281768 0.92372881 0.97345133 0.90296496]
|
|
|
|
mean value: 0.9400147628662179
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.71052632 0.84210526 1. 0.84210526 0.84210526
|
|
0.83783784 0.91891892 0.76315789 0.86842105]
|
|
|
|
mean value: 0.8414651493598861
|
|
|
|
key: train_recall
|
|
value: [0.95294118 0.95882353 0.95294118 0.97647059 0.98235294 0.98529412
|
|
0.98533724 0.95894428 0.97058824 0.98529412]
|
|
|
|
mean value: 0.9708987407279628
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.77631579 0.81578947 0.86842105 0.84210526 0.78947368
|
|
0.78733997 0.77524893 0.82752489 0.85312945]
|
|
|
|
mean value: 0.8151137980085348
|
|
|
|
key: train_roc_auc
|
|
value: [0.96029412 0.96029412 0.95735294 0.93382353 0.96029412 0.96176471
|
|
0.95443333 0.93976626 0.97209764 0.93986114]
|
|
|
|
mean value: 0.9539981887183027
|
|
|
|
key: test_jcc
|
|
value: [0.68181818 0.61363636 0.69565217 0.79166667 0.72727273 0.66666667
|
|
0.65957447 0.66666667 0.69047619 0.75 ]
|
|
|
|
mean value: 0.6943430105201613
|
|
|
|
key: train_jcc
|
|
value: [0.92307692 0.92351275 0.91784703 0.8806366 0.92520776 0.92797784
|
|
0.91553134 0.88858696 0.94555874 0.89095745]
|
|
|
|
mean value: 0.9138893374525558
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06166363 0.05503798 0.09528446 0.1059835 0.09725285 0.10320044
|
|
0.10138893 0.08779979 0.06832457 0.09928083]
|
|
|
|
mean value: 0.08752169609069824
|
|
|
|
key: score_time
|
|
value: [0.0130868 0.01302743 0.02460408 0.02283335 0.02892256 0.03161383
|
|
0.01267195 0.01292849 0.02644396 0.02918243]
|
|
|
|
mean value: 0.02153148651123047
|
|
|
|
key: test_mcc
|
|
value: [0.65812266 0.60547285 0.76554733 0.84327404 0.89473684 0.68516016
|
|
0.79731451 0.82825406 0.8161102 0.76214986]
|
|
|
|
mean value: 0.7656142511920719
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.82894737 0.80263158 0.88157895 0.92105263 0.94736842 0.84210526
|
|
0.89333333 0.90666667 0.90666667 0.88 ]
|
|
|
|
mean value: 0.8810350877192983
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83116883 0.8 0.88607595 0.92307692 0.94736842 0.84615385
|
|
0.9 0.91358025 0.90410959 0.88607595]
|
|
|
|
mean value: 0.8837609756141086
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.82051282 0.81081081 0.85365854 0.9 0.94736842 0.825
|
|
0.8372093 0.84090909 0.94285714 0.85365854]
|
|
|
|
mean value: 0.863198466163881
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.78947368 0.92105263 0.94736842 0.94736842 0.86842105
|
|
0.97297297 1. 0.86842105 0.92105263]
|
|
|
|
mean value: 0.907823613086771
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.82894737 0.80263158 0.88157895 0.92105263 0.94736842 0.84210526
|
|
0.89438122 0.90789474 0.9071835 0.87944523]
|
|
|
|
mean value: 0.8812588904694167
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71111111 0.66666667 0.79545455 0.85714286 0.9 0.73333333
|
|
0.81818182 0.84090909 0.825 0.79545455]
|
|
|
|
mean value: 0.7943253968253968
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.37604213 0.48544645 0.43626404 0.46652532 0.44118333 0.49089432
|
|
0.43220329 0.22220016 0.22277522 0.22343111]
|
|
|
|
mean value: 0.37969653606414794
|
|
|
|
key: score_time
|
|
value: [0.04538631 0.04444051 0.05273294 0.0486722 0.04547024 0.05108738
|
|
0.02474356 0.02525091 0.02526355 0.02541089]
|
|
|
|
mean value: 0.03884584903717041
|
|
|
|
key: test_mcc
|
|
value: [0.63510735 0.55436186 0.50017322 0.66366484 0.63510735 0.60547285
|
|
0.65362731 0.52770861 0.684292 0.7341428 ]
|
|
|
|
mean value: 0.6193658193714568
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.77631579 0.75 0.82894737 0.81578947 0.80263158
|
|
0.82666667 0.76 0.84 0.86666667]
|
|
|
|
mean value: 0.8082807017543859
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.80555556 0.76712329 0.74666667 0.83950617 0.80555556 0.80519481
|
|
0.82666667 0.775 0.83333333 0.87179487]
|
|
|
|
mean value: 0.8076396915278193
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85294118 0.8 0.75675676 0.79069767 0.85294118 0.79487179
|
|
0.81578947 0.72093023 0.88235294 0.85 ]
|
|
|
|
mean value: 0.8117281226407154
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.76315789 0.73684211 0.73684211 0.89473684 0.76315789 0.81578947
|
|
0.83783784 0.83783784 0.78947368 0.89473684]
|
|
|
|
mean value: 0.8070412517780938
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.77631579 0.75 0.82894737 0.81578947 0.80263158
|
|
0.82681366 0.76102418 0.84068279 0.86628734]
|
|
|
|
mean value: 0.8084281650071125
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6744186 0.62222222 0.59574468 0.72340426 0.6744186 0.67391304
|
|
0.70454545 0.63265306 0.71428571 0.77272727]
|
|
|
|
mean value: 0.6788332913955952
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01618338 0.01619029 0.01625562 0.0160234 0.01612997 0.01603913
|
|
0.01632571 0.01601529 0.01641369 0.01617646]
|
|
|
|
mean value: 0.016175293922424318
|
|
|
|
key: score_time
|
|
value: [0.01259589 0.01263499 0.01265812 0.01247358 0.01253796 0.01258564
|
|
0.01242828 0.01261926 0.01248264 0.01243114]
|
|
|
|
mean value: 0.012544751167297363
|
|
|
|
key: test_mcc
|
|
value: [0.58218174 0.36893239 0.31755367 0.47970161 0.52631579 0.42163702
|
|
0.3408334 0.56442848 0.30654339 0.62967232]
|
|
|
|
mean value: 0.4537799812761454
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.68421053 0.65789474 0.73684211 0.76315789 0.71052632
|
|
0.66666667 0.77333333 0.65333333 0.81333333]
|
|
|
|
mean value: 0.7248771929824561
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.69230769 0.675 0.75609756 0.76315789 0.71794872
|
|
0.69135802 0.79518072 0.65789474 0.825 ]
|
|
|
|
mean value: 0.7373945350393891
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76190476 0.675 0.64285714 0.70454545 0.76315789 0.7
|
|
0.63636364 0.7173913 0.65789474 0.78571429]
|
|
|
|
mean value: 0.7044829217312055
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.71052632 0.71052632 0.81578947 0.76315789 0.73684211
|
|
0.75675676 0.89189189 0.65789474 0.86842105]
|
|
|
|
mean value: 0.7753911806543385
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.68421053 0.65789474 0.73684211 0.76315789 0.71052632
|
|
0.66785206 0.77489331 0.65327169 0.8125889 ]
|
|
|
|
mean value: 0.7250711237553343
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.52941176 0.50943396 0.60784314 0.61702128 0.56
|
|
0.52830189 0.66 0.49019608 0.70212766]
|
|
|
|
mean value: 0.587100243228564
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.63583255 3.80037642 3.32375193 3.1128571 3.57785368 3.45375061
|
|
3.6680789 3.59620523 2.71234298 2.51360083]
|
|
|
|
mean value: 3.339465022087097
|
|
|
|
key: score_time
|
|
value: [0.14846182 0.13906503 0.10990095 0.19633579 0.13120627 0.13332629
|
|
0.13334417 0.16465759 0.09881902 0.10249853]
|
|
|
|
mean value: 0.13576154708862304
|
|
|
|
key: test_mcc
|
|
value: [0.78947368 0.73786479 0.87603759 0.84327404 0.86872191 0.76554733
|
|
0.82825406 0.79731451 0.79143584 0.81365576]
|
|
|
|
mean value: 0.8111579516880107
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.86842105 0.93421053 0.92105263 0.93421053 0.88157895
|
|
0.90666667 0.89333333 0.89333333 0.90666667]
|
|
|
|
mean value: 0.9034210526315789
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.89473684 0.87179487 0.9382716 0.92307692 0.93506494 0.88607595
|
|
0.91358025 0.9 0.88888889 0.90666667]
|
|
|
|
mean value: 0.905815692881649
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.85 0.88372093 0.9 0.92307692 0.85365854
|
|
0.84090909 0.8372093 0.94117647 0.91891892]
|
|
|
|
mean value: 0.8843407014741937
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.89473684 1. 0.94736842 0.94736842 0.92105263
|
|
1. 0.97297297 0.84210526 0.89473684]
|
|
|
|
mean value: 0.9315078236130867
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.86842105 0.93421053 0.92105263 0.93421053 0.88157895
|
|
0.90789474 0.89438122 0.8940256 0.90682788]
|
|
|
|
mean value: 0.9037339971550498
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.80952381 0.77272727 0.88372093 0.85714286 0.87804878 0.79545455
|
|
0.84090909 0.81818182 0.8 0.82926829]
|
|
|
|
mean value: 0.8284977397342683
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.0864861 1.05055857 1.04278326 1.08012772 1.05407476 1.08882928
|
|
1.05638576 1.03551531 1.16890883 1.05544353]
|
|
|
|
mean value: 1.0719113111495973
|
|
|
|
key: score_time
|
|
value: [0.19623375 0.23241067 0.12970495 0.21930242 0.18969011 0.1542902
|
|
0.21838427 0.20401025 0.12365365 0.20367074]
|
|
|
|
mean value: 0.18713510036468506
|
|
|
|
key: test_mcc
|
|
value: [0.78947368 0.73786479 0.85280287 0.84327404 0.86872191 0.74095857
|
|
0.82093797 0.76721166 0.79143584 0.81352334]
|
|
|
|
mean value: 0.8026204669114924
|
|
|
|
key: train_mcc
|
|
value: [0.92737353 0.92679534 0.92714855 0.92148545 0.91518498 0.9358772
|
|
0.92705885 0.92976951 0.9272585 0.91878827]
|
|
|
|
mean value: 0.9256740178772753
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.86842105 0.92105263 0.92105263 0.93421053 0.86842105
|
|
0.90666667 0.88 0.89333333 0.90666667]
|
|
|
|
mean value: 0.8994561403508772
|
|
|
|
key: train_accuracy
|
|
value: [0.96323529 0.96323529 0.96323529 0.96029412 0.95735294 0.96764706
|
|
0.96328928 0.96475771 0.96328928 0.95888399]
|
|
|
|
mean value: 0.962522026431718
|
|
|
|
key: test_fscore
|
|
value: [0.89473684 0.87179487 0.92682927 0.92307692 0.93333333 0.875
|
|
0.91139241 0.88607595 0.88888889 0.90909091]
|
|
|
|
mean value: 0.9020219391013252
|
|
|
|
key: train_fscore
|
|
value: [0.96402878 0.96371553 0.96392496 0.96115108 0.95803184 0.96820809
|
|
0.96392496 0.96521739 0.96392496 0.95977011]
|
|
|
|
mean value: 0.9631897714291754
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.85 0.86363636 0.9 0.94594595 0.83333333
|
|
0.85714286 0.83333333 0.94117647 0.8974359 ]
|
|
|
|
mean value: 0.8816741043521229
|
|
|
|
key: train_precision
|
|
value: [0.94366197 0.9512894 0.94617564 0.94084507 0.94301994 0.95170455
|
|
0.94886364 0.95415473 0.94617564 0.93820225]
|
|
|
|
mean value: 0.9464092815144691
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.89473684 1. 0.94736842 0.92105263 0.92105263
|
|
0.97297297 0.94594595 0.84210526 0.92105263]
|
|
|
|
mean value: 0.9261024182076814
|
|
|
|
key: train_recall
|
|
value: [0.98529412 0.97647059 0.98235294 0.98235294 0.97352941 0.98529412
|
|
0.97947214 0.97653959 0.98235294 0.98235294]
|
|
|
|
mean value: 0.9806011730205278
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.86842105 0.92105263 0.92105263 0.93421053 0.86842105
|
|
0.90753912 0.88086771 0.8940256 0.90647226]
|
|
|
|
mean value: 0.8996799431009957
|
|
|
|
key: train_roc_auc
|
|
value: [0.96323529 0.96323529 0.96323529 0.96029412 0.95735294 0.96764706
|
|
0.96326548 0.96474038 0.96331723 0.95891841]
|
|
|
|
mean value: 0.9625241504226324
|
|
|
|
key: test_jcc
|
|
value: [0.80952381 0.77272727 0.86363636 0.85714286 0.875 0.77777778
|
|
0.8372093 0.79545455 0.8 0.83333333]
|
|
|
|
mean value: 0.8221805261921541
|
|
|
|
key: train_jcc
|
|
value: [0.93055556 0.92997199 0.93036212 0.92520776 0.91944444 0.93837535
|
|
0.93036212 0.93277311 0.93036212 0.92265193]
|
|
|
|
mean value: 0.9290066489088546
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01105309 0.01093221 0.01110411 0.01116133 0.01123071 0.01129651
|
|
0.01142764 0.01346993 0.01300025 0.01421642]
|
|
|
|
mean value: 0.011889219284057617
|
|
|
|
key: score_time
|
|
value: [0.00909066 0.00912118 0.00927067 0.00918627 0.00927615 0.00932002
|
|
0.00940156 0.0108161 0.01103806 0.01135945]
|
|
|
|
mean value: 0.009788012504577637
|
|
|
|
key: test_mcc
|
|
value: [0.52704628 0.36893239 0.37310125 0.37686733 0.26462806 0.58218174
|
|
0.33740121 0.3349972 0.33309617 0.62660028]
|
|
|
|
mean value: 0.41248519170310405
|
|
|
|
key: train_mcc
|
|
value: [0.53430003 0.52557935 0.51304448 0.52531378 0.54048656 0.48732394
|
|
0.52176681 0.50112681 0.53073607 0.49660188]
|
|
|
|
mean value: 0.5176279715359968
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.68421053 0.68421053 0.68421053 0.63157895 0.78947368
|
|
0.66666667 0.66666667 0.66666667 0.81333333]
|
|
|
|
mean value: 0.7050175438596491
|
|
|
|
key: train_accuracy
|
|
value: [0.76617647 0.76176471 0.75588235 0.76176471 0.76911765 0.74264706
|
|
0.76064611 0.75036711 0.76358297 0.74743025]
|
|
|
|
mean value: 0.7579379372894532
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.69230769 0.70731707 0.71428571 0.65 0.77777778
|
|
0.6835443 0.67532468 0.67532468 0.81578947]
|
|
|
|
mean value: 0.7160902154903714
|
|
|
|
key: train_fscore
|
|
value: [0.77574048 0.77183099 0.76420455 0.77118644 0.77918425 0.75386779
|
|
0.7661406 0.75574713 0.77607789 0.75706215]
|
|
|
|
mean value: 0.7671042252842464
|
|
|
|
key: test_precision
|
|
value: [0.75 0.675 0.65909091 0.65217391 0.61904762 0.82352941
|
|
0.64285714 0.65 0.66666667 0.81578947]
|
|
|
|
mean value: 0.6954155136154733
|
|
|
|
key: train_precision
|
|
value: [0.74525745 0.74054054 0.73901099 0.74184783 0.74663073 0.72237197
|
|
0.75 0.74084507 0.73614776 0.72826087]
|
|
|
|
mean value: 0.7390913200874492
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.71052632 0.76315789 0.78947368 0.68421053 0.73684211
|
|
0.72972973 0.7027027 0.68421053 0.81578947]
|
|
|
|
mean value: 0.7406116642958749
|
|
|
|
key: train_recall
|
|
value: [0.80882353 0.80588235 0.79117647 0.80294118 0.81470588 0.78823529
|
|
0.7829912 0.771261 0.82058824 0.78823529]
|
|
|
|
mean value: 0.7974840434707607
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.68421053 0.68421053 0.68421053 0.63157895 0.78947368
|
|
0.66749644 0.66714083 0.66642959 0.81330014]
|
|
|
|
mean value: 0.7051209103840683
|
|
|
|
key: train_roc_auc
|
|
value: [0.76617647 0.76176471 0.75588235 0.76176471 0.76911765 0.74264706
|
|
0.76061325 0.75033638 0.76366655 0.74749008]
|
|
|
|
mean value: 0.7579459203036053
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.52941176 0.54716981 0.55555556 0.48148148 0.63636364
|
|
0.51923077 0.50980392 0.50980392 0.68888889]
|
|
|
|
mean value: 0.5602709750684224
|
|
|
|
key: train_jcc
|
|
value: [0.63364055 0.62844037 0.6183908 0.62758621 0.63824885 0.60496614
|
|
0.62093023 0.6073903 0.63409091 0.60909091]
|
|
|
|
mean value: 0.6222775270314147
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [1.44187474 1.51880646 1.45888186 1.48014545 1.55064511 1.47095942
|
|
1.61160803 1.50361967 1.42467666 2.27024412]
|
|
|
|
mean value: 1.573146152496338
|
|
|
|
key: score_time
|
|
value: [0.0144403 0.01148272 0.01314688 0.01296282 0.01611161 0.01171851
|
|
0.01321745 0.01351833 0.01442194 0.01498127]
|
|
|
|
mean value: 0.01360018253326416
|
|
|
|
key: test_mcc
|
|
value: [0.73684211 0.78947368 0.89597867 0.84327404 0.89597867 0.79056942
|
|
0.82093797 0.87466428 0.8161102 0.89466215]
|
|
|
|
mean value: 0.8358491184507353
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.89473684 0.94736842 0.92105263 0.94736842 0.89473684
|
|
0.90666667 0.93333333 0.90666667 0.94666667]
|
|
|
|
mean value: 0.9167017543859649
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86842105 0.89473684 0.94871795 0.92307692 0.94871795 0.8974359
|
|
0.91139241 0.93670886 0.90410959 0.94594595]
|
|
|
|
mean value: 0.9179263413495388
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.86842105 0.89473684 0.925 0.9 0.925 0.875
|
|
0.85714286 0.88095238 0.94285714 0.97222222]
|
|
|
|
mean value: 0.9041332497911445
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.89473684 0.97368421 0.94736842 0.97368421 0.92105263
|
|
0.97297297 1. 0.86842105 0.92105263]
|
|
|
|
mean value: 0.9341394025604552
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86842105 0.89473684 0.94736842 0.92105263 0.94736842 0.89473684
|
|
0.90753912 0.93421053 0.9071835 0.9470128 ]
|
|
|
|
mean value: 0.9169630156472262
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76744186 0.80952381 0.90243902 0.85714286 0.90243902 0.81395349
|
|
0.8372093 0.88095238 0.825 0.8974359 ]
|
|
|
|
mean value: 0.8493537644998224
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.17971611 0.13384604 0.09080482 0.07746363 0.05548596 0.16681075
|
|
0.07742786 0.06104589 0.06684136 0.0902791 ]
|
|
|
|
mean value: 0.09997215270996093
|
|
|
|
key: score_time
|
|
value: [0.05199099 0.01303172 0.02048802 0.01262736 0.01928401 0.0231142
|
|
0.01284122 0.01210785 0.03380132 0.01261544]
|
|
|
|
mean value: 0.02119021415710449
|
|
|
|
key: test_mcc
|
|
value: [0.58630197 0.6599546 0.53300179 0.67716122 0.71077247 0.61057165
|
|
0.4419112 0.71878284 0.60970498 0.73786392]
|
|
|
|
mean value: 0.6286026641946988
|
|
|
|
key: train_mcc
|
|
value: [0.81969175 0.79578531 0.80447713 0.79299297 0.82358955 0.81598097
|
|
0.82342387 0.80783687 0.791651 0.78850003]
|
|
|
|
mean value: 0.8063929436646495
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.82894737 0.76315789 0.82894737 0.85526316 0.80263158
|
|
0.72 0.84 0.8 0.86666667]
|
|
|
|
mean value: 0.8095087719298246
|
|
|
|
key: train_accuracy
|
|
value: [0.90882353 0.89705882 0.90147059 0.89558824 0.91029412 0.90735294
|
|
0.91042584 0.9030837 0.89427313 0.8928047 ]
|
|
|
|
mean value: 0.90211756068066
|
|
|
|
key: test_fscore
|
|
value: [0.80487805 0.82191781 0.7804878 0.84705882 0.85333333 0.81481481
|
|
0.72727273 0.86046512 0.81927711 0.86111111]
|
|
|
|
mean value: 0.8190616696651918
|
|
|
|
key: train_fscore
|
|
value: [0.91193182 0.9002849 0.90442225 0.89900427 0.91396333 0.90987124
|
|
0.91396333 0.90625 0.89859155 0.89703808]
|
|
|
|
mean value: 0.9055320772815084
|
|
|
|
key: test_precision
|
|
value: [0.75 0.85714286 0.72727273 0.76595745 0.86486486 0.76744186
|
|
0.7 0.75510204 0.75555556 0.91176471]
|
|
|
|
mean value: 0.7855102058808311
|
|
|
|
key: train_precision
|
|
value: [0.88186813 0.87292818 0.87811634 0.87052342 0.87804878 0.88579387
|
|
0.88043478 0.87878788 0.86216216 0.86178862]
|
|
|
|
mean value: 0.8750452161930994
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.78947368 0.84210526 0.94736842 0.84210526 0.86842105
|
|
0.75675676 1. 0.89473684 0.81578947]
|
|
|
|
mean value: 0.8625177809388336
|
|
|
|
key: train_recall
|
|
value: [0.94411765 0.92941176 0.93235294 0.92941176 0.95294118 0.93529412
|
|
0.95014663 0.93548387 0.93823529 0.93529412]
|
|
|
|
mean value: 0.9382689322063136
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.82894737 0.76315789 0.82894737 0.85526316 0.80263158
|
|
0.72048364 0.84210526 0.79871977 0.8673542 ]
|
|
|
|
mean value: 0.8097083926031294
|
|
|
|
key: train_roc_auc
|
|
value: [0.90882353 0.89705882 0.90147059 0.89558824 0.91029412 0.90735294
|
|
0.91036743 0.90303605 0.89433759 0.892867 ]
|
|
|
|
mean value: 0.9021196308435397
|
|
|
|
key: test_jcc
|
|
value: [0.67346939 0.69767442 0.64 0.73469388 0.74418605 0.6875
|
|
0.57142857 0.75510204 0.69387755 0.75609756]
|
|
|
|
mean value: 0.6954029454663317
|
|
|
|
key: train_jcc
|
|
value: [0.8381201 0.81865285 0.82552083 0.81653747 0.84155844 0.83464567
|
|
0.84155844 0.82857143 0.81585678 0.81329923]
|
|
|
|
mean value: 0.8274321246422995
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0137732 0.01285267 0.01282406 0.01277924 0.01282811 0.01281929
|
|
0.01282907 0.01285195 0.01330185 0.0129087 ]
|
|
|
|
mean value: 0.012976813316345214
|
|
|
|
key: score_time
|
|
value: [0.01142526 0.01044106 0.01043034 0.01051903 0.01051569 0.01054072
|
|
0.0104959 0.0108881 0.01071024 0.01049948]
|
|
|
|
mean value: 0.010646581649780273
|
|
|
|
key: test_mcc
|
|
value: [0.56225353 0.3981989 0.42640143 0.5383819 0.42163702 0.66934944
|
|
0.54107468 0.30747339 0.62775817 0.68339862]
|
|
|
|
mean value: 0.5175927085976846
|
|
|
|
key: train_mcc
|
|
value: [0.5383202 0.54702374 0.57686 0.52683016 0.53311399 0.53589009
|
|
0.52930047 0.56053498 0.53784509 0.54194783]
|
|
|
|
mean value: 0.5427666549535174
|
|
|
|
key: test_accuracy
|
|
value: [0.77631579 0.69736842 0.71052632 0.76315789 0.71052632 0.82894737
|
|
0.76 0.65333333 0.81333333 0.84 ]
|
|
|
|
mean value: 0.7553508771929824
|
|
|
|
key: train_accuracy
|
|
value: [0.76764706 0.77205882 0.78529412 0.76176471 0.76470588 0.76617647
|
|
0.76358297 0.77826725 0.76651982 0.76798825]
|
|
|
|
mean value: 0.7694005355446143
|
|
|
|
key: test_fscore
|
|
value: [0.79518072 0.71604938 0.73170732 0.78571429 0.71794872 0.84337349
|
|
0.78571429 0.65789474 0.81081081 0.85 ]
|
|
|
|
mean value: 0.7694393753686896
|
|
|
|
key: train_fscore
|
|
value: [0.77932961 0.78321678 0.8 0.77437326 0.77777778 0.77885953
|
|
0.77419355 0.79114799 0.78068966 0.78356164]
|
|
|
|
mean value: 0.7823149797969657
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.6744186 0.68181818 0.7173913 0.7 0.77777778
|
|
0.70212766 0.64102564 0.83333333 0.80952381]
|
|
|
|
mean value: 0.7270749645385534
|
|
|
|
key: train_precision
|
|
value: [0.74202128 0.74666667 0.74871795 0.73544974 0.73684211 0.73878628
|
|
0.74193548 0.7486911 0.73506494 0.73333333]
|
|
|
|
mean value: 0.7407508864122306
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.76315789 0.78947368 0.86842105 0.73684211 0.92105263
|
|
0.89189189 0.67567568 0.78947368 0.89473684]
|
|
|
|
mean value: 0.8199146514935989
|
|
|
|
key: train_recall
|
|
value: [0.82058824 0.82352941 0.85882353 0.81764706 0.82352941 0.82352941
|
|
0.80938416 0.83870968 0.83235294 0.84117647]
|
|
|
|
mean value: 0.8289270312230463
|
|
|
|
key: test_roc_auc
|
|
value: [0.77631579 0.69736842 0.71052632 0.76315789 0.71052632 0.82894737
|
|
0.76173542 0.65362731 0.81365576 0.83926031]
|
|
|
|
mean value: 0.7555120910384068
|
|
|
|
key: train_roc_auc
|
|
value: [0.76764706 0.77205882 0.78529412 0.76176471 0.76470588 0.76617647
|
|
0.76351561 0.77817837 0.76661635 0.76809557]
|
|
|
|
mean value: 0.7694052958426773
|
|
|
|
key: test_jcc
|
|
value: [0.66 0.55769231 0.57692308 0.64705882 0.56 0.72916667
|
|
0.64705882 0.49019608 0.68181818 0.73913043]
|
|
|
|
mean value: 0.6289044393373038
|
|
|
|
key: train_jcc
|
|
value: [0.63844394 0.64367816 0.66666667 0.63181818 0.63636364 0.63781321
|
|
0.63157895 0.65446224 0.64027149 0.64414414]
|
|
|
|
mean value: 0.6425240620828065
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03077173 0.02946591 0.02603412 0.03342438 0.02362275 0.02827597
|
|
0.03586054 0.02817464 0.03121662 0.02424049]
|
|
|
|
mean value: 0.029108715057373048
|
|
|
|
key: score_time
|
|
value: [0.01538658 0.01207542 0.01265955 0.01309633 0.0127635 0.01228857
|
|
0.01225114 0.01875544 0.01243281 0.01222944]
|
|
|
|
mean value: 0.013393878936767578
|
|
|
|
key: test_mcc
|
|
value: [0.66934944 0.5383819 0.41833001 0.55708601 0.6599546 0.63245553
|
|
0.55746481 0.4419112 0.50539409 0.47272456]
|
|
|
|
mean value: 0.545305215603671
|
|
|
|
key: train_mcc
|
|
value: [0.71357341 0.75673436 0.53011809 0.62844724 0.73403438 0.72504884
|
|
0.79783384 0.76807557 0.69335248 0.43911172]
|
|
|
|
mean value: 0.6786329919697462
|
|
|
|
key: test_accuracy
|
|
value: [0.82894737 0.76315789 0.68421053 0.73684211 0.82894737 0.81578947
|
|
0.77333333 0.72 0.73333333 0.68 ]
|
|
|
|
mean value: 0.7564561403508772
|
|
|
|
key: train_accuracy
|
|
value: [0.85 0.87647059 0.73088235 0.78676471 0.86617647 0.86176471
|
|
0.89574156 0.88399413 0.83994126 0.67694567]
|
|
|
|
mean value: 0.8268681437332642
|
|
|
|
key: test_fscore
|
|
value: [0.8115942 0.73529412 0.5862069 0.79166667 0.82191781 0.82051282
|
|
0.79012346 0.72727273 0.67741935 0.53846154]
|
|
|
|
mean value: 0.7300469589859098
|
|
|
|
key: train_fscore
|
|
value: [0.83387622 0.86996904 0.64327485 0.82295482 0.87055477 0.86609687
|
|
0.90206897 0.88330871 0.82218597 0.53974895]
|
|
|
|
mean value: 0.8054039174937626
|
|
|
|
key: test_precision
|
|
value: [0.90322581 0.83333333 0.85 0.65517241 0.85714286 0.8
|
|
0.72727273 0.7 0.875 1. ]
|
|
|
|
mean value: 0.8201147137993634
|
|
|
|
key: train_precision
|
|
value: [0.93430657 0.91830065 0.95375723 0.70354906 0.84297521 0.83977901
|
|
0.8515625 0.88988095 0.92307692 0.93478261]
|
|
|
|
mean value: 0.879197070520412
|
|
|
|
key: test_recall
|
|
value: [0.73684211 0.65789474 0.44736842 1. 0.78947368 0.84210526
|
|
0.86486486 0.75675676 0.55263158 0.36842105]
|
|
|
|
mean value: 0.7016358463726885
|
|
|
|
key: train_recall
|
|
value: [0.75294118 0.82647059 0.48529412 0.99117647 0.9 0.89411765
|
|
0.95894428 0.87683284 0.74117647 0.37941176]
|
|
|
|
mean value: 0.7806365361393824
|
|
|
|
key: test_roc_auc
|
|
value: [0.82894737 0.76315789 0.68421053 0.73684211 0.82894737 0.81578947
|
|
0.7745377 0.72048364 0.73577525 0.68421053]
|
|
|
|
mean value: 0.7572901849217639
|
|
|
|
key: train_roc_auc
|
|
value: [0.85 0.87647059 0.73088235 0.78676471 0.86617647 0.86176471
|
|
0.89564861 0.88400466 0.83979645 0.6765094 ]
|
|
|
|
mean value: 0.8268017940313955
|
|
|
|
key: test_jcc
|
|
value: [0.68292683 0.58139535 0.41463415 0.65517241 0.69767442 0.69565217
|
|
0.65306122 0.57142857 0.51219512 0.36842105]
|
|
|
|
mean value: 0.583256130125893
|
|
|
|
key: train_jcc
|
|
value: [0.7150838 0.76986301 0.47413793 0.69917012 0.77078086 0.7638191
|
|
0.82160804 0.79100529 0.69806094 0.36962751]
|
|
|
|
mean value: 0.6873156600195558
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03787994 0.02455521 0.02442789 0.02470016 0.02165627 0.02965689
|
|
0.02790475 0.02669692 0.02766299 0.02927542]
|
|
|
|
mean value: 0.027441644668579103
|
|
|
|
key: score_time
|
|
value: [0.01379728 0.0121367 0.01298499 0.02103901 0.01326203 0.01263905
|
|
0.01222754 0.01251531 0.01273179 0.01237869]
|
|
|
|
mean value: 0.01357123851776123
|
|
|
|
key: test_mcc
|
|
value: [0.63510735 0.41048084 0.44136741 0.55708601 0.45184806 0.63960215
|
|
0.43065492 0.59621136 0.58086018 0.63198054]
|
|
|
|
mean value: 0.5375198815486677
|
|
|
|
key: train_mcc
|
|
value: [0.79332396 0.55185884 0.57210295 0.57511255 0.4846768 0.79106684
|
|
0.50910425 0.77303788 0.67887742 0.61005074]
|
|
|
|
mean value: 0.6339212235279867
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.67105263 0.69736842 0.73684211 0.68421053 0.81578947
|
|
0.65333333 0.78666667 0.77333333 0.78666667]
|
|
|
|
mean value: 0.7421052631578947
|
|
|
|
key: train_accuracy
|
|
value: [0.89558824 0.74558824 0.76029412 0.75 0.7 0.89264706
|
|
0.70778267 0.88399413 0.8164464 0.77239354]
|
|
|
|
mean value: 0.7924734387146929
|
|
|
|
key: test_fscore
|
|
value: [0.825 0.54545455 0.61016949 0.79166667 0.55555556 0.82926829
|
|
0.74 0.80952381 0.80898876 0.82608696]
|
|
|
|
mean value: 0.7341714081975611
|
|
|
|
key: train_fscore
|
|
value: [0.89929078 0.67047619 0.69758813 0.7995283 0.58196721 0.89875173
|
|
0.77360637 0.89042996 0.84433375 0.81392557]
|
|
|
|
mean value: 0.7869897993420873
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.88235294 0.85714286 0.65517241 0.9375 0.77272727
|
|
0.58730159 0.72340426 0.70588235 0.7037037 ]
|
|
|
|
mean value: 0.7610901669819606
|
|
|
|
key: train_precision
|
|
value: [0.86849315 0.95135135 0.94472362 0.66732283 0.95945946 0.8503937
|
|
0.63197026 0.84473684 0.73218143 0.68762677]
|
|
|
|
mean value: 0.8138259417681408
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.39473684 0.47368421 1. 0.39473684 0.89473684
|
|
1. 0.91891892 0.94736842 1. ]
|
|
|
|
mean value: 0.7892603129445235
|
|
|
|
key: train_recall
|
|
value: [0.93235294 0.51764706 0.55294118 0.99705882 0.41764706 0.95294118
|
|
0.99706745 0.94134897 0.99705882 0.99705882]
|
|
|
|
mean value: 0.8303122304640331
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.67105263 0.69736842 0.73684211 0.68421053 0.81578947
|
|
0.65789474 0.78840683 0.77098151 0.78378378]
|
|
|
|
mean value: 0.7422119487908961
|
|
|
|
key: train_roc_auc
|
|
value: [0.89558824 0.74558824 0.76029412 0.75 0.7 0.89264706
|
|
0.70735725 0.88390978 0.81671123 0.77272296]
|
|
|
|
mean value: 0.7924818871830257
|
|
|
|
key: test_jcc
|
|
value: [0.70212766 0.375 0.43902439 0.65517241 0.38461538 0.70833333
|
|
0.58730159 0.68 0.67924528 0.7037037 ]
|
|
|
|
mean value: 0.5914523755584351
|
|
|
|
key: train_jcc
|
|
value: [0.81701031 0.50429799 0.53561254 0.66601179 0.41040462 0.81612091
|
|
0.63079777 0.8025 0.73060345 0.68623482]
|
|
|
|
mean value: 0.6599594197802489
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.26274276 0.25890565 0.25864434 0.25764346 0.25323176 0.25744224
|
|
0.25645471 0.26036859 0.26048374 0.26044703]
|
|
|
|
mean value: 0.2586364269256592
|
|
|
|
key: score_time
|
|
value: [0.01856923 0.01842999 0.01838875 0.01839066 0.01797223 0.0185132
|
|
0.01836491 0.01880026 0.01848912 0.01853037]
|
|
|
|
mean value: 0.01844487190246582
|
|
|
|
key: test_mcc
|
|
value: [0.76342228 0.79056942 0.69989647 0.87603759 0.76342228 0.76554733
|
|
0.7626532 0.82093797 0.8161102 0.89466215]
|
|
|
|
mean value: 0.7953258903655164
|
|
|
|
key: train_mcc
|
|
value: [0.93823935 0.92657079 0.94421975 0.93856417 0.93543979 0.93856417
|
|
0.93277543 0.94452745 0.92690507 0.90927841]
|
|
|
|
mean value: 0.9335084377638028
|
|
|
|
key: test_accuracy
|
|
value: [0.88157895 0.89473684 0.84210526 0.93421053 0.88157895 0.88157895
|
|
0.88 0.90666667 0.90666667 0.94666667]
|
|
|
|
mean value: 0.895578947368421
|
|
|
|
key: train_accuracy
|
|
value: [0.96911765 0.96323529 0.97205882 0.96911765 0.96764706 0.96911765
|
|
0.96622614 0.97209985 0.96328928 0.95447871]
|
|
|
|
mean value: 0.9666388097089056
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.89189189 0.85714286 0.9382716 0.88 0.88607595
|
|
0.88311688 0.91139241 0.90410959 0.94594595]
|
|
|
|
mean value: 0.8977947126507325
|
|
|
|
key: train_fscore
|
|
value: [0.969163 0.96350365 0.97226277 0.96952104 0.96793003 0.96952104
|
|
0.96671491 0.97250362 0.96371553 0.95500726]
|
|
|
|
mean value: 0.9669842848618135
|
|
|
|
key: test_precision
|
|
value: [0.89189189 0.91666667 0.7826087 0.88372093 0.89189189 0.85365854
|
|
0.85 0.85714286 0.94285714 0.97222222]
|
|
|
|
mean value: 0.884266083514277
|
|
|
|
key: train_precision
|
|
value: [0.96774194 0.95652174 0.96521739 0.95702006 0.95953757 0.95702006
|
|
0.95428571 0.96 0.9512894 0.94269341]
|
|
|
|
mean value: 0.9571327275094806
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.86842105 0.94736842 1. 0.86842105 0.92105263
|
|
0.91891892 0.97297297 0.86842105 0.92105263]
|
|
|
|
mean value: 0.9155049786628734
|
|
|
|
key: train_recall
|
|
value: [0.97058824 0.97058824 0.97941176 0.98235294 0.97647059 0.98235294
|
|
0.97947214 0.98533724 0.97647059 0.96764706]
|
|
|
|
mean value: 0.9770691737105399
|
|
|
|
key: test_roc_auc
|
|
value: [0.88157895 0.89473684 0.84210526 0.93421053 0.88157895 0.88157895
|
|
0.88051209 0.90753912 0.9071835 0.9470128 ]
|
|
|
|
mean value: 0.8958036984352774
|
|
|
|
key: train_roc_auc
|
|
value: [0.96911765 0.96323529 0.97205882 0.96911765 0.96764706 0.96911765
|
|
0.96620666 0.97208039 0.96330861 0.95449802]
|
|
|
|
mean value: 0.9666387786786269
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.80487805 0.75 0.88372093 0.78571429 0.79545455
|
|
0.79069767 0.8372093 0.825 0.8974359 ]
|
|
|
|
mean value: 0.8155824970076246
|
|
|
|
key: train_jcc
|
|
value: [0.94017094 0.92957746 0.94602273 0.94084507 0.93785311 0.94084507
|
|
0.93557423 0.94647887 0.92997199 0.91388889]
|
|
|
|
mean value: 0.9361228361037823
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07968974 0.08710742 0.09043193 0.08065605 0.0839889 0.09154201
|
|
0.10539412 0.09674501 0.0930841 0.08596063]
|
|
|
|
mean value: 0.08945999145507813
|
|
|
|
key: score_time
|
|
value: [0.01942086 0.02579451 0.02129459 0.02325535 0.02987933 0.03148293
|
|
0.0253067 0.02420497 0.02332687 0.02277589]
|
|
|
|
mean value: 0.024674201011657716
|
|
|
|
key: test_mcc
|
|
value: [0.76342228 0.71077247 0.97402153 0.79056942 0.89597867 0.71275096
|
|
0.76721166 0.85123569 0.8161102 0.81352334]
|
|
|
|
mean value: 0.8095596234043082
|
|
|
|
key: train_mcc
|
|
value: [0.979416 0.98250594 0.97951769 0.99118076 0.97648748 0.97944989
|
|
0.98535412 0.98239592 0.99706742 0.98531987]
|
|
|
|
mean value: 0.9838695079822907
|
|
|
|
key: test_accuracy
|
|
value: [0.88157895 0.85526316 0.98684211 0.89473684 0.94736842 0.85526316
|
|
0.88 0.92 0.90666667 0.90666667]
|
|
|
|
mean value: 0.903438596491228
|
|
|
|
key: train_accuracy
|
|
value: [0.98970588 0.99117647 0.98970588 0.99558824 0.98823529 0.98970588
|
|
0.99265786 0.99118943 0.99853157 0.99265786]
|
|
|
|
mean value: 0.9919154357778354
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.88311688 0.85333333 0.98701299 0.8974359 0.94594595 0.86075949
|
|
0.88607595 0.925 0.90410959 0.90909091]
|
|
|
|
mean value: 0.9051880988015026
|
|
|
|
key: train_fscore
|
|
value: [0.989721 0.99109792 0.98978102 0.99558174 0.98820059 0.98966027
|
|
0.99263623 0.99117647 0.99852725 0.99263623]
|
|
|
|
mean value: 0.9919018713014629
|
|
|
|
key: test_precision
|
|
value: [0.87179487 0.86486486 0.97435897 0.875 0.97222222 0.82926829
|
|
0.83333333 0.86046512 0.94285714 0.8974359 ]
|
|
|
|
mean value: 0.8921600715829303
|
|
|
|
key: train_precision
|
|
value: [0.98826979 1. 0.9826087 0.99705015 0.99112426 0.99406528
|
|
0.99704142 0.99410029 1. 0.99410029]
|
|
|
|
mean value: 0.9938360190209191
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.84210526 1. 0.92105263 0.92105263 0.89473684
|
|
0.94594595 1. 0.86842105 0.92105263]
|
|
|
|
mean value: 0.9209103840682789
|
|
|
|
key: train_recall
|
|
value: [0.99117647 0.98235294 0.99705882 0.99411765 0.98529412 0.98529412
|
|
0.98826979 0.98826979 0.99705882 0.99117647]
|
|
|
|
mean value: 0.9900069001207521
|
|
|
|
key: test_roc_auc
|
|
value: [0.88157895 0.85526316 0.98684211 0.89473684 0.94736842 0.85526316
|
|
0.88086771 0.92105263 0.9071835 0.90647226]
|
|
|
|
mean value: 0.9036628733997155
|
|
|
|
key: train_roc_auc
|
|
value: [0.98970588 0.99117647 0.98970588 0.99558824 0.98823529 0.98970588
|
|
0.99266431 0.99119372 0.99852941 0.99265568]
|
|
|
|
mean value: 0.9919160772813524
|
|
|
|
key: test_jcc
|
|
value: [0.79069767 0.74418605 0.97435897 0.81395349 0.8974359 0.75555556
|
|
0.79545455 0.86046512 0.825 0.83333333]
|
|
|
|
mean value: 0.8290440631719701
|
|
|
|
key: train_jcc
|
|
value: [0.97965116 0.98235294 0.97976879 0.99120235 0.97667638 0.97953216
|
|
0.98538012 0.98250729 0.99705882 0.98538012]
|
|
|
|
mean value: 0.983951013079501
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.58085155 0.4722631 0.66565657 0.43656301 0.73668122 0.53079462
|
|
0.50757051 0.43947673 0.52401352 0.40544534]
|
|
|
|
mean value: 0.529931616783142
|
|
|
|
key: score_time
|
|
value: [0.0383954 0.04386234 0.02763677 0.03717446 0.05298567 0.03316832
|
|
0.04133582 0.04108596 0.0173068 0.05029941]
|
|
|
|
mean value: 0.03832509517669678
|
|
|
|
key: test_mcc
|
|
value: [0.71077247 0.47970161 0.56225353 0.64605828 0.58218174 0.55282303
|
|
0.63072008 0.44914911 0.57437737 0.65338095]
|
|
|
|
mean value: 0.5841418166265647
|
|
|
|
key: train_mcc
|
|
value: [0.93284128 0.93608802 0.94444859 0.93020068 0.93856417 0.9391497
|
|
0.92159435 0.94739617 0.93030612 0.93294225]
|
|
|
|
mean value: 0.9353531321800787
|
|
|
|
key: test_accuracy
|
|
value: [0.85526316 0.73684211 0.77631579 0.81578947 0.78947368 0.77631579
|
|
0.81333333 0.72 0.78666667 0.82666667]
|
|
|
|
mean value: 0.7896666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.96617647 0.96764706 0.97205882 0.96470588 0.96911765 0.96911765
|
|
0.96035242 0.97356828 0.96475771 0.96622614]
|
|
|
|
mean value: 0.9673728081540987
|
|
|
|
key: test_fscore
|
|
value: [0.85333333 0.75609756 0.79518072 0.83333333 0.8 0.77333333
|
|
0.82051282 0.74074074 0.78378378 0.83116883]
|
|
|
|
mean value: 0.7987484460073353
|
|
|
|
key: train_fscore
|
|
value: [0.96671491 0.96829971 0.9724238 0.96541787 0.96952104 0.96978417
|
|
0.96126255 0.97391304 0.96541787 0.96671491]
|
|
|
|
mean value: 0.9679469876100104
|
|
|
|
key: test_precision
|
|
value: [0.86486486 0.70454545 0.73333333 0.76086957 0.76190476 0.78378378
|
|
0.7804878 0.68181818 0.80555556 0.82051282]
|
|
|
|
mean value: 0.7697676126414197
|
|
|
|
key: train_precision
|
|
value: [0.95156695 0.94915254 0.95988539 0.94632768 0.95702006 0.94929577
|
|
0.94101124 0.96275072 0.94632768 0.95156695]
|
|
|
|
mean value: 0.9514904983799819
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.81578947 0.86842105 0.92105263 0.84210526 0.76315789
|
|
0.86486486 0.81081081 0.76315789 0.84210526]
|
|
|
|
mean value: 0.8333570412517781
|
|
|
|
key: train_recall
|
|
value: [0.98235294 0.98823529 0.98529412 0.98529412 0.98235294 0.99117647
|
|
0.98240469 0.98533724 0.98529412 0.98235294]
|
|
|
|
mean value: 0.9850094876660341
|
|
|
|
key: test_roc_auc
|
|
value: [0.85526316 0.73684211 0.77631579 0.81578947 0.78947368 0.77631579
|
|
0.81401138 0.72119488 0.78698435 0.82645804]
|
|
|
|
mean value: 0.7898648648648648
|
|
|
|
key: train_roc_auc
|
|
value: [0.96617647 0.96764706 0.97205882 0.96470588 0.96911765 0.96911765
|
|
0.96031999 0.97355097 0.96478782 0.96624978]
|
|
|
|
mean value: 0.96737321028118
|
|
|
|
key: test_jcc
|
|
value: [0.74418605 0.60784314 0.66 0.71428571 0.66666667 0.63043478
|
|
0.69565217 0.58823529 0.64444444 0.71111111]
|
|
|
|
mean value: 0.6662859370913853
|
|
|
|
key: train_jcc
|
|
value: [0.93557423 0.93854749 0.94632768 0.93314763 0.94084507 0.94134078
|
|
0.92541436 0.94915254 0.93314763 0.93557423]
|
|
|
|
mean value: 0.9379071653216253
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.0936563 1.09588432 1.08864546 1.09667611 1.08197212 1.68473005
|
|
1.21586967 1.02494574 0.94529748 1.00627208]
|
|
|
|
mean value: 1.1333949327468873
|
|
|
|
key: score_time
|
|
value: [0.01061368 0.01106811 0.01119781 0.0110724 0.01127148 0.02288246
|
|
0.01375771 0.00960803 0.00992942 0.00988293]
|
|
|
|
mean value: 0.012128400802612304
|
|
|
|
key: test_mcc
|
|
value: [0.78947368 0.86872191 0.89473684 0.8183437 0.92137172 0.79388419
|
|
0.79731451 0.87466428 0.78790056 0.89466215]
|
|
|
|
mean value: 0.8441073533189662
|
|
|
|
key: train_mcc
|
|
value: [0.97944989 0.98825239 0.98236994 0.98529838 0.96768473 0.98236994
|
|
0.97652173 0.97650509 0.97944627 0.97944609]
|
|
|
|
mean value: 0.9797344442010547
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.93421053 0.94736842 0.90789474 0.96052632 0.89473684
|
|
0.89333333 0.93333333 0.89333333 0.94666667]
|
|
|
|
mean value: 0.9206140350877193
|
|
|
|
key: train_accuracy
|
|
value: [0.98970588 0.99411765 0.99117647 0.99264706 0.98382353 0.99117647
|
|
0.98825257 0.98825257 0.989721 0.989721 ]
|
|
|
|
mean value: 0.9898594195387407
|
|
|
|
key: test_fscore
|
|
value: [0.89473684 0.93506494 0.94736842 0.91139241 0.96103896 0.9
|
|
0.9 0.93670886 0.89189189 0.94594595]
|
|
|
|
mean value: 0.9224148262922414
|
|
|
|
key: train_fscore
|
|
value: [0.9897511 0.99410029 0.99120235 0.99265786 0.98375185 0.99120235
|
|
0.98830409 0.98826979 0.989721 0.98969072]
|
|
|
|
mean value: 0.9898651396108782
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.92307692 0.94736842 0.87804878 0.94871795 0.85714286
|
|
0.8372093 0.88095238 0.91666667 0.97222222]
|
|
|
|
mean value: 0.905614234475028
|
|
|
|
key: train_precision
|
|
value: [0.98542274 0.99704142 0.98830409 0.99120235 0.98813056 0.98830409
|
|
0.98542274 0.98826979 0.98826979 0.99115044]
|
|
|
|
mean value: 0.9891518030062375
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.94736842 0.94736842 0.94736842 0.97368421 0.94736842
|
|
0.97297297 1. 0.86842105 0.92105263]
|
|
|
|
mean value: 0.9420341394025604
|
|
|
|
key: train_recall
|
|
value: [0.99411765 0.99117647 0.99411765 0.99411765 0.97941176 0.99411765
|
|
0.99120235 0.98826979 0.99117647 0.98823529]
|
|
|
|
mean value: 0.9905942728997758
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.93421053 0.94736842 0.90789474 0.96052632 0.89473684
|
|
0.89438122 0.93421053 0.89366999 0.9470128 ]
|
|
|
|
mean value: 0.9208748221906117
|
|
|
|
key: train_roc_auc
|
|
value: [0.98970588 0.99411765 0.99117647 0.99264706 0.98382353 0.99117647
|
|
0.98824823 0.98825254 0.98972313 0.98971882]
|
|
|
|
mean value: 0.9898589787821287
|
|
|
|
key: test_jcc
|
|
value: [0.80952381 0.87804878 0.9 0.8372093 0.925 0.81818182
|
|
0.81818182 0.88095238 0.80487805 0.8974359 ]
|
|
|
|
mean value: 0.8569411855869599
|
|
|
|
key: train_jcc
|
|
value: [0.97971014 0.98826979 0.98255814 0.98542274 0.96802326 0.98255814
|
|
0.97687861 0.97681159 0.97965116 0.97959184]
|
|
|
|
mean value: 0.9799475421502499
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.1283884 0.19084382 0.15524626 0.11127925 0.10671949 0.13878107
|
|
0.18179059 0.13466334 0.12567091 0.05272937]
|
|
|
|
mean value: 0.13261125087738038
|
|
|
|
key: score_time
|
|
value: [0.0198245 0.02475691 0.01662803 0.03627682 0.02782297 0.04103661
|
|
0.03285313 0.02479196 0.01356196 0.03608537]
|
|
|
|
mean value: 0.02736382484436035
|
|
|
|
key: test_mcc
|
|
value: [0.31448545 0.08574929 0.04072315 0.29277002 0.31448545 0.17149859
|
|
0.22491669 0.25457586 0.21009482 0.1639348 ]
|
|
|
|
mean value: 0.20732341220799325
|
|
|
|
key: train_mcc
|
|
value: [0.30536468 0.30536468 0.33333333 0.30536468 0.30823376 0.33060328
|
|
0.3026794 0.31975237 0.30753396 0.32159529]
|
|
|
|
mean value: 0.3139825436303487
|
|
|
|
key: test_accuracy
|
|
value: [0.61842105 0.52631579 0.51315789 0.57894737 0.61842105 0.55263158
|
|
0.56 0.57333333 0.57333333 0.54666667]
|
|
|
|
mean value: 0.5661228070175439
|
|
|
|
key: train_accuracy
|
|
value: [0.58529412 0.58529412 0.6 0.58529412 0.58676471 0.59852941
|
|
0.58443465 0.59324523 0.58590308 0.59324523]
|
|
|
|
mean value: 0.5898004664420834
|
|
|
|
key: test_fscore
|
|
value: [0.71287129 0.66037736 0.64761905 0.7037037 0.71287129 0.67924528
|
|
0.68571429 0.69230769 0.69230769 0.68518519]
|
|
|
|
mean value: 0.6872202822604466
|
|
|
|
key: train_fscore
|
|
value: [0.70686071 0.70686071 0.71428571 0.70686071 0.70759625 0.7135362
|
|
0.70673575 0.71115746 0.70686071 0.71055381]
|
|
|
|
mean value: 0.7091308018080202
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.51470588 0.50746269 0.54285714 0.57142857 0.52941176
|
|
0.52941176 0.53731343 0.54545455 0.52857143]
|
|
|
|
mean value: 0.537804579090795
|
|
|
|
key: train_precision
|
|
value: [0.54662379 0.54662379 0.55555556 0.54662379 0.54750403 0.55464927
|
|
0.54647436 0.55177994 0.54662379 0.55105348]
|
|
|
|
mean value: 0.5493511802927066
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.92105263 0.89473684 1. 0.94736842 0.94736842
|
|
0.97297297 0.97297297 0.94736842 0.97368421]
|
|
|
|
mean value: 0.9524893314366999
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.61842105 0.52631579 0.51315789 0.57894737 0.61842105 0.55263158
|
|
0.56543385 0.57859175 0.56827881 0.54089616]
|
|
|
|
mean value: 0.5661095305832148
|
|
|
|
key: train_roc_auc
|
|
value: [0.58529412 0.58529412 0.6 0.58529412 0.58676471 0.59852941
|
|
0.58382353 0.59264706 0.58651026 0.59384164]
|
|
|
|
mean value: 0.5897998964981886
|
|
|
|
key: test_jcc
|
|
value: [0.55384615 0.49295775 0.47887324 0.54285714 0.55384615 0.51428571
|
|
0.52173913 0.52941176 0.52941176 0.52112676]
|
|
|
|
mean value: 0.5238355571160586
|
|
|
|
key: train_jcc
|
|
value: [0.54662379 0.54662379 0.55555556 0.54662379 0.54750403 0.55464927
|
|
0.54647436 0.55177994 0.54662379 0.55105348]
|
|
|
|
mean value: 0.5493511802927066
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.47
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04559278 0.03283882 0.04472232 0.03194022 0.01865029 0.02330136
|
|
0.02904868 0.04296136 0.03664732 0.03862453]
|
|
|
|
mean value: 0.03443276882171631
|
|
|
|
key: score_time
|
|
value: [0.03223968 0.02177191 0.04498196 0.02915931 0.01795554 0.01281667
|
|
0.01943016 0.02032876 0.02191067 0.02729392]
|
|
|
|
mean value: 0.024788856506347656
|
|
|
|
key: test_mcc
|
|
value: [0.6599546 0.65812266 0.56881543 0.71675803 0.73786479 0.61057165
|
|
0.55746481 0.68450529 0.6806858 0.8161102 ]
|
|
|
|
mean value: 0.6690853260107446
|
|
|
|
key: train_mcc
|
|
value: [0.78188102 0.77285127 0.77007905 0.76455087 0.77321467 0.77598004
|
|
0.78612232 0.77282342 0.77985922 0.75985095]
|
|
|
|
mean value: 0.7737212825417513
|
|
|
|
key: test_accuracy
|
|
value: [0.82894737 0.82894737 0.77631579 0.85526316 0.86842105 0.80263158
|
|
0.77333333 0.82666667 0.84 0.90666667]
|
|
|
|
mean value: 0.8307192982456141
|
|
|
|
key: train_accuracy
|
|
value: [0.88970588 0.88529412 0.88382353 0.88088235 0.88529412 0.88676471
|
|
0.89133627 0.88546256 0.88839941 0.87812041]
|
|
|
|
mean value: 0.8855083354927874
|
|
|
|
key: test_fscore
|
|
value: [0.83544304 0.82666667 0.8 0.86419753 0.86486486 0.81481481
|
|
0.79012346 0.84705882 0.84615385 0.90410959]
|
|
|
|
mean value: 0.8393432630699704
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.89391796 0.88951841 0.88826025 0.88575458 0.88983051 0.89108911
|
|
0.89635854 0.88951841 0.89295775 0.88359046]
|
|
|
|
mean value: 0.8900795999053073
|
|
|
|
key: test_precision
|
|
value: [0.80487805 0.83783784 0.72340426 0.81395349 0.88888889 0.76744186
|
|
0.72727273 0.75 0.825 0.94285714]
|
|
|
|
mean value: 0.8081534249793443
|
|
|
|
key: train_precision
|
|
value: [0.86103542 0.8579235 0.85558583 0.85094851 0.85597826 0.85831063
|
|
0.85790885 0.86027397 0.85675676 0.84450402]
|
|
|
|
mean value: 0.8559225745723615
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.81578947 0.89473684 0.92105263 0.84210526 0.86842105
|
|
0.86486486 0.97297297 0.86842105 0.86842105]
|
|
|
|
mean value: 0.878520625889047
|
|
|
|
key: train_recall
|
|
value: [0.92941176 0.92352941 0.92352941 0.92352941 0.92647059 0.92647059
|
|
0.93841642 0.92082111 0.93235294 0.92647059]
|
|
|
|
mean value: 0.9271002242539245
|
|
|
|
key: test_roc_auc
|
|
value: [0.82894737 0.82894737 0.77631579 0.85526316 0.86842105 0.80263158
|
|
0.7745377 0.82859175 0.83961593 0.9071835 ]
|
|
|
|
mean value: 0.831045519203414
|
|
|
|
key: train_roc_auc
|
|
value: [0.88970588 0.88529412 0.88382353 0.88088235 0.88529412 0.88676471
|
|
0.89126703 0.88541056 0.88846386 0.87819131]
|
|
|
|
mean value: 0.8855097464205623
|
|
|
|
key: test_jcc
|
|
value: [0.7173913 0.70454545 0.66666667 0.76086957 0.76190476 0.6875
|
|
0.65306122 0.73469388 0.73333333 0.825 ]
|
|
|
|
mean value: 0.724496618805625
|
|
|
|
key: train_jcc
|
|
value: [0.80818414 0.80102041 0.79898219 0.79493671 0.80152672 0.80357143
|
|
0.81218274 0.80102041 0.80661578 0.79145729]
|
|
|
|
mean value: 0.801949780646398
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.37840986 0.41018391 0.42391634 0.49048257 0.44300199 0.40282965
|
|
0.46529341 0.4824481 0.44228101 0.42206383]
|
|
|
|
mean value: 0.4360910654067993
|
|
|
|
key: score_time
|
|
value: [0.02124166 0.02055788 0.01945543 0.02575946 0.02589774 0.02572131
|
|
0.02259636 0.02559733 0.02544737 0.02028871]
|
|
|
|
mean value: 0.023256325721740724
|
|
|
|
key: test_mcc
|
|
value: [0.63510735 0.65812266 0.56881543 0.69989647 0.76342228 0.61057165
|
|
0.50330696 0.68450529 0.6806858 0.78790056]
|
|
|
|
mean value: 0.6592334452240006
|
|
|
|
key: train_mcc
|
|
value: [0.81068054 0.77285127 0.77007905 0.79922228 0.81728696 0.77598004
|
|
0.82342387 0.77282342 0.77985922 0.79920148]
|
|
|
|
mean value: 0.7921408114809676
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.82894737 0.77631579 0.84210526 0.88157895 0.80263158
|
|
0.74666667 0.82666667 0.84 0.89333333]
|
|
|
|
mean value: 0.8254035087719298
|
|
|
|
key: train_accuracy
|
|
value: [0.90441176 0.88529412 0.88382353 0.89852941 0.90735294 0.88676471
|
|
0.91042584 0.88546256 0.88839941 0.89867841]
|
|
|
|
mean value: 0.8949142696726268
|
|
|
|
key: test_fscore
|
|
value: [0.825 0.82666667 0.8 0.85714286 0.88 0.81481481
|
|
0.7654321 0.84705882 0.84615385 0.89189189]
|
|
|
|
mean value: 0.835416099896492
|
|
|
|
key: train_fscore
|
|
value: [0.90753912 0.88951841 0.88826025 0.90212766 0.91089109 0.89108911
|
|
0.91396333 0.88951841 0.89295775 0.90184922]
|
|
|
|
mean value: 0.89877143502015
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.83783784 0.72340426 0.7826087 0.89189189 0.76744186
|
|
0.70454545 0.75 0.825 0.91666667]
|
|
|
|
mean value: 0.7985110948092575
|
|
|
|
key: train_precision
|
|
value: [0.87878788 0.8579235 0.85558583 0.87123288 0.8773842 0.85831063
|
|
0.88043478 0.86027397 0.85675676 0.87327824]
|
|
|
|
mean value: 0.8669968655601713
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.81578947 0.89473684 0.94736842 0.86842105 0.86842105
|
|
0.83783784 0.97297297 0.86842105 0.86842105]
|
|
|
|
mean value: 0.8810810810810811
|
|
|
|
key: train_recall
|
|
value: [0.93823529 0.92352941 0.92352941 0.93529412 0.94705882 0.92647059
|
|
0.95014663 0.92082111 0.93235294 0.93235294]
|
|
|
|
mean value: 0.9329791271347249
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.82894737 0.77631579 0.84210526 0.88157895 0.80263158
|
|
0.74786629 0.82859175 0.83961593 0.89366999]
|
|
|
|
mean value: 0.8257112375533429
|
|
|
|
key: train_roc_auc
|
|
value: [0.90441176 0.88529412 0.88382353 0.89852941 0.90735294 0.88676471
|
|
0.91036743 0.88541056 0.88846386 0.89872779]
|
|
|
|
mean value: 0.8949146110056926
|
|
|
|
key: test_jcc
|
|
value: [0.70212766 0.70454545 0.66666667 0.75 0.78571429 0.6875
|
|
0.62 0.73469388 0.73333333 0.80487805]
|
|
|
|
mean value: 0.7189459326165717
|
|
|
|
key: train_jcc
|
|
value: [0.83072917 0.80102041 0.79898219 0.82170543 0.83636364 0.80357143
|
|
0.84155844 0.80102041 0.80661578 0.82124352]
|
|
|
|
mean value: 0.8162810403535945
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0425415 0.03874874 0.03927588 0.0383656 0.03998423 0.03813457
|
|
0.03758001 0.03949738 0.03931594 0.04858851]
|
|
|
|
mean value: 0.04020323753356934
|
|
|
|
key: score_time
|
|
value: [0.015064 0.01481438 0.01471043 0.01503921 0.01485944 0.01232386
|
|
0.01542783 0.01530528 0.01557565 0.02455449]
|
|
|
|
mean value: 0.015767455101013184
|
|
|
|
key: test_mcc
|
|
value: [0.41798942 0.49734925 0.60000053 0.60000053 0.46384865 0.63745526
|
|
0.78353876 0.52777778 0.6005291 0.67284827]
|
|
|
|
mean value: 0.5801337529991459
|
|
|
|
key: train_mcc
|
|
value: [0.75817419 0.74334416 0.75401501 0.73098113 0.74616811 0.77856602
|
|
0.72613214 0.75072428 0.74651079 0.75012681]
|
|
|
|
mean value: 0.7484742649998071
|
|
|
|
key: test_accuracy
|
|
value: [0.70909091 0.74545455 0.8 0.8 0.72727273 0.81818182
|
|
0.89090909 0.76363636 0.8 0.83636364]
|
|
|
|
mean value: 0.7890909090909091
|
|
|
|
key: train_accuracy
|
|
value: [0.87878788 0.87070707 0.87676768 0.86464646 0.87272727 0.88888889
|
|
0.86262626 0.87474747 0.87272727 0.87474747]
|
|
|
|
mean value: 0.8737373737373737
|
|
|
|
key: test_fscore
|
|
value: [0.7037037 0.75862069 0.79245283 0.79245283 0.74576271 0.82758621
|
|
0.89655172 0.76363636 0.8 0.84210526]
|
|
|
|
mean value: 0.7922872323429382
|
|
|
|
key: train_fscore
|
|
value: [0.88142292 0.87548638 0.87920792 0.86939571 0.87573964 0.89108911
|
|
0.86561265 0.87795276 0.87573964 0.87698413]
|
|
|
|
mean value: 0.8768630868479899
|
|
|
|
key: test_precision
|
|
value: [0.7037037 0.70967742 0.80769231 0.80769231 0.6875 0.8
|
|
0.86666667 0.77777778 0.81481481 0.82758621]
|
|
|
|
mean value: 0.7803111204598969
|
|
|
|
key: train_precision
|
|
value: [0.86434109 0.84586466 0.86381323 0.84150943 0.85714286 0.87209302
|
|
0.84555985 0.85440613 0.85384615 0.85992218]
|
|
|
|
mean value: 0.8558498599520898
|
|
|
|
key: test_recall
|
|
value: [0.7037037 0.81481481 0.77777778 0.77777778 0.81481481 0.85714286
|
|
0.92857143 0.75 0.78571429 0.85714286]
|
|
|
|
mean value: 0.8067460317460318
|
|
|
|
key: train_recall
|
|
value: [0.89919355 0.90725806 0.89516129 0.89919355 0.89516129 0.91093117
|
|
0.88663968 0.90283401 0.89878543 0.89473684]
|
|
|
|
mean value: 0.8989894867441557
|
|
|
|
key: test_roc_auc
|
|
value: [0.70899471 0.74669312 0.79960317 0.79960317 0.72883598 0.81746032
|
|
0.89021164 0.76388889 0.80026455 0.83597884]
|
|
|
|
mean value: 0.7891534391534392
|
|
|
|
key: train_roc_auc
|
|
value: [0.87874657 0.87063308 0.87673044 0.86457653 0.87268186 0.88893333
|
|
0.86267468 0.8748041 0.87277981 0.87478778]
|
|
|
|
mean value: 0.8737348178137652
|
|
|
|
key: test_jcc
|
|
value: [0.54285714 0.61111111 0.65625 0.65625 0.59459459 0.70588235
|
|
0.8125 0.61764706 0.66666667 0.72727273]
|
|
|
|
mean value: 0.6591031654266949
|
|
|
|
key: train_jcc
|
|
value: [0.78798587 0.77854671 0.7844523 0.76896552 0.77894737 0.80357143
|
|
0.7630662 0.78245614 0.77894737 0.78091873]
|
|
|
|
mean value: 0.7807857628358514
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.88894773 0.93776298 1.03471494 1.2528863 3.10576725 3.06497574
|
|
2.95477295 2.19183493 2.09952784 2.15640903]
|
|
|
|
mean value: 1.9687599658966064
|
|
|
|
key: score_time
|
|
value: [0.01816058 0.01642561 0.01566052 0.02913046 0.06285167 0.05462337
|
|
0.03699875 0.0217731 0.03647137 0.02854633]
|
|
|
|
mean value: 0.032064175605773924
|
|
|
|
key: test_mcc
|
|
value: [0.42094935 0.67328042 0.64214885 0.63624339 0.63624339 0.71588202
|
|
0.75724019 0.63745526 0.67284827 0.67284827]
|
|
|
|
mean value: 0.6465139398537825
|
|
|
|
key: train_mcc
|
|
value: [0.87889823 0.87923896 0.9076639 0.85478898 0.8668376 0.81482109
|
|
0.89521728 0.88733922 0.86334179 0.87890613]
|
|
|
|
mean value: 0.8727053173033449
|
|
|
|
key: test_accuracy
|
|
value: [0.70909091 0.83636364 0.81818182 0.81818182 0.81818182 0.85454545
|
|
0.87272727 0.81818182 0.83636364 0.83636364]
|
|
|
|
mean value: 0.8218181818181818
|
|
|
|
key: train_accuracy
|
|
value: [0.93939394 0.93939394 0.95353535 0.92727273 0.93333333 0.90707071
|
|
0.94747475 0.94343434 0.93131313 0.93939394]
|
|
|
|
mean value: 0.9361616161616162
|
|
|
|
key: test_fscore
|
|
value: [0.68 0.83636364 0.8 0.81481481 0.81481481 0.86666667
|
|
0.8852459 0.82758621 0.84210526 0.84210526]
|
|
|
|
mean value: 0.8209702567511618
|
|
|
|
key: train_fscore
|
|
value: [0.94 0.94047619 0.95445545 0.92828685 0.93413174 0.90873016
|
|
0.948 0.94422311 0.93253968 0.93975904]
|
|
|
|
mean value: 0.9370602210121474
|
|
|
|
key: test_precision
|
|
value: [0.73913043 0.82142857 0.86956522 0.81481481 0.81481481 0.8125
|
|
0.81818182 0.8 0.82758621 0.82758621]
|
|
|
|
mean value: 0.8145608085207036
|
|
|
|
key: train_precision
|
|
value: [0.93253968 0.92578125 0.93774319 0.91732283 0.92490119 0.89105058
|
|
0.93675889 0.92941176 0.91439689 0.93227092]
|
|
|
|
mean value: 0.9242177188755878
|
|
|
|
key: test_recall
|
|
value: [0.62962963 0.85185185 0.74074074 0.81481481 0.81481481 0.92857143
|
|
0.96428571 0.85714286 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8316137566137566
|
|
|
|
key: train_recall
|
|
value: [0.94758065 0.95564516 0.97177419 0.93951613 0.94354839 0.92712551
|
|
0.95951417 0.95951417 0.951417 0.94736842]
|
|
|
|
mean value: 0.9503003787384093
|
|
|
|
key: test_roc_auc
|
|
value: [0.70767196 0.83664021 0.81679894 0.81812169 0.81812169 0.8531746
|
|
0.87103175 0.81746032 0.83597884 0.83597884]
|
|
|
|
mean value: 0.8210978835978836
|
|
|
|
key: train_roc_auc
|
|
value: [0.93937737 0.93936104 0.95349843 0.92724794 0.93331266 0.90711114
|
|
0.94749902 0.94346676 0.93135366 0.93941002]
|
|
|
|
mean value: 0.9361638043620216
|
|
|
|
key: test_jcc
|
|
value: [0.51515152 0.71875 0.66666667 0.6875 0.6875 0.76470588
|
|
0.79411765 0.70588235 0.72727273 0.72727273]
|
|
|
|
mean value: 0.6994819518716577
|
|
|
|
key: train_jcc
|
|
value: [0.88679245 0.88764045 0.91287879 0.866171 0.87640449 0.83272727
|
|
0.90114068 0.89433962 0.87360595 0.88636364]
|
|
|
|
mean value: 0.8818064352345129
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02516747 0.01584744 0.01584053 0.02754521 0.02742267 0.01632524
|
|
0.01665497 0.01698923 0.01694655 0.01647186]
|
|
|
|
mean value: 0.019521117210388184
|
|
|
|
key: score_time
|
|
value: [0.01397896 0.01405859 0.01394749 0.02441931 0.01508164 0.0150733
|
|
0.01505303 0.01497316 0.01505542 0.01497483]
|
|
|
|
mean value: 0.01566157341003418
|
|
|
|
key: test_mcc
|
|
value: [0.38227513 0.3452381 0.27348302 0.71049701 0.35634832 0.32272861
|
|
0.51036057 0.6005291 0.47230166 0.67328042]
|
|
|
|
mean value: 0.46470419407971714
|
|
|
|
key: train_mcc
|
|
value: [0.50782564 0.51538722 0.52454982 0.55581519 0.53952808 0.47136062
|
|
0.50752329 0.50339767 0.40161517 0.52369686]
|
|
|
|
mean value: 0.5050699560321085
|
|
|
|
key: test_accuracy
|
|
value: [0.69090909 0.67272727 0.63636364 0.85454545 0.67272727 0.65454545
|
|
0.74545455 0.8 0.72727273 0.83636364]
|
|
|
|
mean value: 0.7290909090909091
|
|
|
|
key: train_accuracy
|
|
value: [0.75353535 0.75757576 0.76161616 0.77777778 0.76969697 0.72727273
|
|
0.75353535 0.75151515 0.68282828 0.76161616]
|
|
|
|
mean value: 0.7496969696969696
|
|
|
|
key: test_fscore
|
|
value: [0.69090909 0.66666667 0.6 0.84615385 0.7 0.70769231
|
|
0.78125 0.8 0.69387755 0.83636364]
|
|
|
|
mean value: 0.7322913098805955
|
|
|
|
key: train_fscore
|
|
value: [0.76078431 0.76190476 0.77042802 0.78174603 0.77290837 0.75849732
|
|
0.75793651 0.7554672 0.59846547 0.76587302]
|
|
|
|
mean value: 0.7484010999885592
|
|
|
|
key: test_precision
|
|
value: [0.67857143 0.66666667 0.65217391 0.88 0.63636364 0.62162162
|
|
0.69444444 0.81481481 0.80952381 0.85185185]
|
|
|
|
mean value: 0.7306032186901752
|
|
|
|
key: train_precision
|
|
value: [0.74045802 0.75 0.7443609 0.76953125 0.76377953 0.67948718
|
|
0.74319066 0.7421875 0.8125 0.75097276]
|
|
|
|
mean value: 0.7496467798693562
|
|
|
|
key: test_recall
|
|
value: [0.7037037 0.66666667 0.55555556 0.81481481 0.77777778 0.82142857
|
|
0.89285714 0.78571429 0.60714286 0.82142857]
|
|
|
|
mean value: 0.7447089947089947
|
|
|
|
key: train_recall
|
|
value: [0.78225806 0.77419355 0.7983871 0.79435484 0.78225806 0.8582996
|
|
0.77327935 0.76923077 0.47368421 0.78137652]
|
|
|
|
mean value: 0.7587322058247356
|
|
|
|
key: test_roc_auc
|
|
value: [0.69113757 0.67261905 0.63492063 0.85383598 0.67460317 0.65145503
|
|
0.74272487 0.80026455 0.72949735 0.83664021]
|
|
|
|
mean value: 0.7287698412698412
|
|
|
|
key: train_roc_auc
|
|
value: [0.75347721 0.75754212 0.76154173 0.77774422 0.76967154 0.72753689
|
|
0.75357516 0.75155087 0.68240662 0.761656 ]
|
|
|
|
mean value: 0.7496702363850072
|
|
|
|
key: test_jcc
|
|
value: [0.52777778 0.5 0.42857143 0.73333333 0.53846154 0.54761905
|
|
0.64102564 0.66666667 0.53125 0.71875 ]
|
|
|
|
mean value: 0.5833455433455433
|
|
|
|
key: train_jcc
|
|
value: [0.61392405 0.61538462 0.62658228 0.64169381 0.62987013 0.61095101
|
|
0.61022364 0.60702875 0.4270073 0.62057878]
|
|
|
|
mean value: 0.6003244367660376
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01697063 0.01707649 0.01695323 0.02036214 0.0169692 0.01720095
|
|
0.01688051 0.01697922 0.01698852 0.01700711]
|
|
|
|
mean value: 0.01733880043029785
|
|
|
|
key: score_time
|
|
value: [0.01532507 0.01510048 0.0149703 0.01515746 0.01492929 0.01499009
|
|
0.01785684 0.01519942 0.01516271 0.01499033]
|
|
|
|
mean value: 0.015368199348449707
|
|
|
|
key: test_mcc
|
|
value: [0.24772018 0.35634832 0.49074074 0.45502646 0.17285969 0.50088476
|
|
0.6005291 0.36428053 0.52935027 0.49271566]
|
|
|
|
mean value: 0.4210455702794488
|
|
|
|
key: train_mcc
|
|
value: [0.53998213 0.50367096 0.49143523 0.51644632 0.57433926 0.50917202
|
|
0.49499616 0.52323038 0.49175236 0.49928143]
|
|
|
|
mean value: 0.5144306254235248
|
|
|
|
key: test_accuracy
|
|
value: [0.61818182 0.67272727 0.74545455 0.72727273 0.58181818 0.74545455
|
|
0.8 0.67272727 0.76363636 0.74545455]
|
|
|
|
mean value: 0.7072727272727273
|
|
|
|
key: train_accuracy
|
|
value: [0.76969697 0.75151515 0.74545455 0.75757576 0.78585859 0.75353535
|
|
0.74747475 0.76161616 0.74545455 0.74949495]
|
|
|
|
mean value: 0.7567676767676768
|
|
|
|
key: test_fscore
|
|
value: [0.6557377 0.7 0.74074074 0.72727273 0.62295082 0.77419355
|
|
0.8 0.625 0.77966102 0.74074074]
|
|
|
|
mean value: 0.7166297298680622
|
|
|
|
key: train_fscore
|
|
value: [0.77559055 0.75834971 0.7519685 0.76653696 0.79615385 0.76356589
|
|
0.74849095 0.7611336 0.7519685 0.75298805]
|
|
|
|
mean value: 0.7626746563688571
|
|
|
|
key: test_precision
|
|
value: [0.58823529 0.63636364 0.74074074 0.71428571 0.55882353 0.70588235
|
|
0.81481481 0.75 0.74193548 0.76923077]
|
|
|
|
mean value: 0.7020312335777231
|
|
|
|
key: train_precision
|
|
value: [0.75769231 0.7394636 0.73461538 0.7406015 0.76102941 0.73234201
|
|
0.744 0.7611336 0.73180077 0.74117647]
|
|
|
|
mean value: 0.7443855056909935
|
|
|
|
key: test_recall
|
|
value: [0.74074074 0.77777778 0.74074074 0.74074074 0.7037037 0.85714286
|
|
0.78571429 0.53571429 0.82142857 0.71428571]
|
|
|
|
mean value: 0.7417989417989418
|
|
|
|
key: train_recall
|
|
value: [0.79435484 0.77822581 0.77016129 0.79435484 0.83467742 0.79757085
|
|
0.75303644 0.7611336 0.77327935 0.76518219]
|
|
|
|
mean value: 0.7821976622698185
|
|
|
|
key: test_roc_auc
|
|
value: [0.62037037 0.67460317 0.74537037 0.72751323 0.58399471 0.74338624
|
|
0.80026455 0.67526455 0.76256614 0.74603175]
|
|
|
|
mean value: 0.707936507936508
|
|
|
|
key: train_roc_auc
|
|
value: [0.76964705 0.75146108 0.74540453 0.75750131 0.78575976 0.75362413
|
|
0.74748596 0.76161519 0.74551064 0.74952658]
|
|
|
|
mean value: 0.7567536241347786
|
|
|
|
key: test_jcc
|
|
value: [0.48780488 0.53846154 0.58823529 0.57142857 0.45238095 0.63157895
|
|
0.66666667 0.45454545 0.63888889 0.58823529]
|
|
|
|
mean value: 0.5618226486024568
|
|
|
|
key: train_jcc
|
|
value: [0.63344051 0.61075949 0.60252366 0.6214511 0.66134185 0.61755486
|
|
0.59807074 0.61437908 0.60252366 0.60383387]
|
|
|
|
mean value: 0.6165878833154442
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01587105 0.02888465 0.02886105 0.0239687 0.03016782 0.01510501
|
|
0.01574898 0.01591349 0.01396894 0.01432109]
|
|
|
|
mean value: 0.020281076431274414
|
|
|
|
key: score_time
|
|
value: [0.07988477 0.04567719 0.05050254 0.04945326 0.04695845 0.04616451
|
|
0.04096031 0.04128742 0.06683803 0.04233098]
|
|
|
|
mean value: 0.05100574493408203
|
|
|
|
key: test_mcc
|
|
value: [0.19973545 0.45430503 0.52777778 0.53121272 0.46384865 0.27248677
|
|
0.23904572 0.27224652 0.38145729 0.38227513]
|
|
|
|
mean value: 0.3724391055182619
|
|
|
|
key: train_mcc
|
|
value: [0.64051863 0.64610223 0.58386939 0.60410477 0.62954999 0.64523653
|
|
0.65781523 0.61659599 0.60871579 0.63323468]
|
|
|
|
mean value: 0.6265743220680544
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.72727273 0.76363636 0.76363636 0.72727273 0.63636364
|
|
0.61818182 0.63636364 0.69090909 0.69090909]
|
|
|
|
mean value: 0.6854545454545454
|
|
|
|
key: train_accuracy
|
|
value: [0.82020202 0.82222222 0.79191919 0.8020202 0.81414141 0.82222222
|
|
0.82828283 0.80808081 0.8040404 0.81616162]
|
|
|
|
mean value: 0.812929292929293
|
|
|
|
key: test_fscore
|
|
value: [0.59259259 0.71698113 0.76363636 0.77192982 0.74576271 0.64285714
|
|
0.60377358 0.65517241 0.70175439 0.69090909]
|
|
|
|
mean value: 0.6885369243160149
|
|
|
|
key: train_fscore
|
|
value: [0.82235529 0.82879377 0.79358717 0.804 0.8203125 0.82608696
|
|
0.83300589 0.8111332 0.80792079 0.82051282]
|
|
|
|
mean value: 0.8167708401907544
|
|
|
|
key: test_precision
|
|
value: [0.59259259 0.73076923 0.75 0.73333333 0.6875 0.64285714
|
|
0.64 0.63333333 0.68965517 0.7037037 ]
|
|
|
|
mean value: 0.680374450900313
|
|
|
|
key: train_precision
|
|
value: [0.81422925 0.80075188 0.78884462 0.79761905 0.79545455 0.80694981
|
|
0.80916031 0.796875 0.79069767 0.8 ]
|
|
|
|
mean value: 0.8000582130010566
|
|
|
|
key: test_recall
|
|
value: [0.59259259 0.7037037 0.77777778 0.81481481 0.81481481 0.64285714
|
|
0.57142857 0.67857143 0.71428571 0.67857143]
|
|
|
|
mean value: 0.698941798941799
|
|
|
|
key: train_recall
|
|
value: [0.83064516 0.85887097 0.7983871 0.81048387 0.84677419 0.84615385
|
|
0.8582996 0.82591093 0.82591093 0.84210526]
|
|
|
|
mean value: 0.83435418571242
|
|
|
|
key: test_roc_auc
|
|
value: [0.59986772 0.72685185 0.76388889 0.76455026 0.72883598 0.63624339
|
|
0.61904762 0.63558201 0.69047619 0.69113757]
|
|
|
|
mean value: 0.6856481481481481
|
|
|
|
key: train_roc_auc
|
|
value: [0.82018088 0.82214803 0.7919061 0.80200307 0.81407536 0.82227047
|
|
0.82834335 0.80811676 0.8040845 0.81621392]
|
|
|
|
mean value: 0.8129342431761786
|
|
|
|
key: test_jcc
|
|
value: [0.42105263 0.55882353 0.61764706 0.62857143 0.59459459 0.47368421
|
|
0.43243243 0.48717949 0.54054054 0.52777778]
|
|
|
|
mean value: 0.5282303691436818
|
|
|
|
key: train_jcc
|
|
value: [0.69830508 0.7076412 0.65780731 0.6722408 0.69536424 0.7037037
|
|
0.71380471 0.68227425 0.67774086 0.69565217]
|
|
|
|
mean value: 0.6904534333515808
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03890347 0.03982401 0.03942442 0.03899908 0.03830123 0.05775476
|
|
0.03943229 0.03770089 0.03759575 0.03800869]
|
|
|
|
mean value: 0.04059445858001709
|
|
|
|
key: score_time
|
|
value: [0.02148795 0.02128959 0.02136207 0.02153945 0.02004528 0.03891206
|
|
0.02066422 0.01909018 0.02133226 0.02119803]
|
|
|
|
mean value: 0.022692108154296876
|
|
|
|
key: test_mcc
|
|
value: [0.31208747 0.61131498 0.6005291 0.57068493 0.33096953 0.63277357
|
|
0.76980036 0.49074074 0.60268595 0.64214885]
|
|
|
|
mean value: 0.5563735473651314
|
|
|
|
key: train_mcc
|
|
value: [0.73298707 0.69131761 0.70028987 0.6966268 0.72029093 0.70412711
|
|
0.67141406 0.6996427 0.70046338 0.71628241]
|
|
|
|
mean value: 0.7033441947158584
|
|
|
|
key: test_accuracy
|
|
value: [0.65454545 0.8 0.8 0.78181818 0.65454545 0.8
|
|
0.87272727 0.74545455 0.8 0.81818182]
|
|
|
|
mean value: 0.7727272727272727
|
|
|
|
key: train_accuracy
|
|
value: [0.86464646 0.84242424 0.84646465 0.84444444 0.85656566 0.84848485
|
|
0.83030303 0.84646465 0.84646465 0.85656566]
|
|
|
|
mean value: 0.8482828282828283
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.81355932 0.8 0.79310345 0.6984127 0.83076923
|
|
0.88888889 0.75 0.81355932 0.83333333]
|
|
|
|
mean value: 0.7888292910414476
|
|
|
|
key: train_fscore
|
|
value: [0.87140115 0.85283019 0.85714286 0.85553471 0.86629002 0.85822306
|
|
0.84386617 0.85606061 0.85660377 0.86266925]
|
|
|
|
mean value: 0.8580621784158269
|
|
|
|
key: test_precision
|
|
value: [0.63333333 0.75 0.78571429 0.74193548 0.61111111 0.72972973
|
|
0.8 0.75 0.77419355 0.78125 ]
|
|
|
|
mean value: 0.7357267492146524
|
|
|
|
key: train_precision
|
|
value: [0.83150183 0.80141844 0.8028169 0.8 0.81272085 0.80496454
|
|
0.78006873 0.80427046 0.80212014 0.82592593]
|
|
|
|
mean value: 0.8065807818114694
|
|
|
|
key: test_recall
|
|
value: [0.7037037 0.88888889 0.81481481 0.85185185 0.81481481 0.96428571
|
|
1. 0.75 0.85714286 0.89285714]
|
|
|
|
mean value: 0.8538359788359788
|
|
|
|
key: train_recall
|
|
value: [0.91532258 0.91129032 0.91935484 0.91935484 0.92741935 0.91902834
|
|
0.91902834 0.91497976 0.91902834 0.90283401]
|
|
|
|
mean value: 0.9167640720908972
|
|
|
|
key: test_roc_auc
|
|
value: [0.65542328 0.8015873 0.80026455 0.78306878 0.65740741 0.79695767
|
|
0.87037037 0.74537037 0.7989418 0.81679894]
|
|
|
|
mean value: 0.7726190476190476
|
|
|
|
key: train_roc_auc
|
|
value: [0.86454388 0.84228484 0.8463171 0.8442928 0.85642223 0.84862707
|
|
0.83048191 0.84660278 0.84661094 0.85665894]
|
|
|
|
mean value: 0.8482842497061512
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.68571429 0.66666667 0.65714286 0.53658537 0.71052632
|
|
0.8 0.6 0.68571429 0.71428571]
|
|
|
|
mean value: 0.6556635491166942
|
|
|
|
key: train_jcc
|
|
value: [0.77210884 0.74342105 0.75 0.74754098 0.7641196 0.75165563
|
|
0.72990354 0.74834437 0.74917492 0.7585034 ]
|
|
|
|
mean value: 0.751477233693424
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.67285013 4.01725817 2.38013887 3.54680538 5.12868834 4.23011827
|
|
4.36870575 4.42736292 4.15877223 4.4367373 ]
|
|
|
|
mean value: 4.0367437362670895
|
|
|
|
key: score_time
|
|
value: [0.0136261 0.02413845 0.01305294 0.02186489 0.02716112 0.0291934
|
|
0.02203465 0.0242362 0.02214146 0.03747463]
|
|
|
|
mean value: 0.02349238395690918
|
|
|
|
key: test_mcc
|
|
value: [0.46028325 0.52715278 0.56349206 0.52715278 0.39011548 0.78961518
|
|
0.67284827 0.49271566 0.70899471 0.63745526]
|
|
|
|
mean value: 0.5769825439757948
|
|
|
|
key: train_mcc
|
|
value: [0.95575223 0.97986939 0.90110424 0.97980606 0.96780199 0.95556354
|
|
0.97178776 0.96364438 0.96780409 0.97178914]
|
|
|
|
mean value: 0.9614922811415452
|
|
|
|
key: test_accuracy
|
|
value: [0.72727273 0.76363636 0.78181818 0.76363636 0.69090909 0.89090909
|
|
0.83636364 0.74545455 0.85454545 0.81818182]
|
|
|
|
mean value: 0.7872727272727272
|
|
|
|
key: train_accuracy
|
|
value: [0.97777778 0.98989899 0.94949495 0.98989899 0.98383838 0.97777778
|
|
0.98585859 0.98181818 0.98383838 0.98585859]
|
|
|
|
mean value: 0.9806060606060606
|
|
|
|
key: test_fscore
|
|
value: [0.69387755 0.75471698 0.77777778 0.75471698 0.71186441 0.9
|
|
0.84210526 0.74074074 0.85714286 0.82758621]
|
|
|
|
mean value: 0.7860528765780043
|
|
|
|
key: train_fscore
|
|
value: [0.97759674 0.98997996 0.95126706 0.98989899 0.984 0.97777778
|
|
0.98574338 0.98181818 0.98393574 0.98591549]
|
|
|
|
mean value: 0.9807933324074231
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.76923077 0.77777778 0.76923077 0.65625 0.84375
|
|
0.82758621 0.76923077 0.85714286 0.8 ]
|
|
|
|
mean value: 0.7842926422236767
|
|
|
|
key: train_precision
|
|
value: [0.98765432 0.98406375 0.92075472 0.99190283 0.97619048 0.97580645
|
|
0.99180328 0.97983871 0.97609562 0.98 ]
|
|
|
|
mean value: 0.9764110150696008
|
|
|
|
key: test_recall
|
|
value: [0.62962963 0.74074074 0.77777778 0.74074074 0.77777778 0.96428571
|
|
0.85714286 0.71428571 0.85714286 0.85714286]
|
|
|
|
mean value: 0.7916666666666666
|
|
|
|
key: train_recall
|
|
value: [0.96774194 0.99596774 0.98387097 0.98790323 0.99193548 0.97975709
|
|
0.97975709 0.98380567 0.99190283 0.99190283]
|
|
|
|
mean value: 0.9854544860911585
|
|
|
|
key: test_roc_auc
|
|
value: [0.7255291 0.76322751 0.78174603 0.76322751 0.69246032 0.88955026
|
|
0.83597884 0.74603175 0.85449735 0.81746032]
|
|
|
|
mean value: 0.7869708994708995
|
|
|
|
key: train_roc_auc
|
|
value: [0.97779809 0.9898867 0.94942536 0.98990303 0.98382199 0.97778177
|
|
0.98584628 0.98182219 0.98385464 0.98587077]
|
|
|
|
mean value: 0.9806010839754473
|
|
|
|
key: test_jcc
|
|
value: [0.53125 0.60606061 0.63636364 0.60606061 0.55263158 0.81818182
|
|
0.72727273 0.58823529 0.75 0.70588235]
|
|
|
|
mean value: 0.6521938619945586
|
|
|
|
key: train_jcc
|
|
value: [0.9561753 0.98015873 0.9070632 0.98 0.96850394 0.95652174
|
|
0.97188755 0.96428571 0.96837945 0.97222222]
|
|
|
|
mean value: 0.9625197835476897
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05610514 0.04562879 0.04057455 0.04896283 0.04259229 0.06603861
|
|
0.08728504 0.07639766 0.04477143 0.04026484]
|
|
|
|
mean value: 0.05486211776733398
|
|
|
|
key: score_time
|
|
value: [0.01496458 0.01488829 0.01476407 0.01479363 0.01489615 0.0249064
|
|
0.02455878 0.02449822 0.01278067 0.01342273]
|
|
|
|
mean value: 0.017447352409362793
|
|
|
|
key: test_mcc
|
|
value: [0.82337971 0.72754449 0.71049701 0.66332968 0.70899471 0.74935731
|
|
0.74935731 0.64214885 0.74569602 0.63745526]
|
|
|
|
mean value: 0.7157760349161901
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.90909091 0.85454545 0.85454545 0.81818182 0.85454545 0.87272727
|
|
0.87272727 0.81818182 0.87272727 0.81818182]
|
|
|
|
mean value: 0.8545454545454545
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.9122807 0.86666667 0.84615385 0.7826087 0.85185185 0.88135593
|
|
0.88135593 0.83333333 0.87719298 0.82758621]
|
|
|
|
mean value: 0.856038614917173
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.86666667 0.78787879 0.88 0.94736842 0.85185185 0.83870968
|
|
0.83870968 0.78125 0.86206897 0.8 ]
|
|
|
|
mean value: 0.8454504047805889
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96296296 0.96296296 0.81481481 0.66666667 0.85185185 0.92857143
|
|
0.92857143 0.89285714 0.89285714 0.85714286]
|
|
|
|
mean value: 0.8759259259259259
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91005291 0.85648148 0.85383598 0.81547619 0.85449735 0.87169312
|
|
0.87169312 0.81679894 0.8723545 0.81746032]
|
|
|
|
mean value: 0.8540343915343915
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83870968 0.76470588 0.73333333 0.64285714 0.74193548 0.78787879
|
|
0.78787879 0.71428571 0.78125 0.70588235]
|
|
|
|
mean value: 0.7498717162818207
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19794202 0.19755411 0.32910442 0.20575571 0.20439601 0.20041323
|
|
0.20080996 0.19847345 0.19954133 0.13866663]
|
|
|
|
mean value: 0.20726568698883058
|
|
|
|
key: score_time
|
|
value: [0.02532339 0.02595329 0.04922533 0.02565217 0.02559996 0.02538157
|
|
0.02559328 0.02610207 0.02643085 0.01795912]
|
|
|
|
mean value: 0.027322101593017577
|
|
|
|
key: test_mcc
|
|
value: [0.19920477 0.56441351 0.60425446 0.52777778 0.63624339 0.60876172
|
|
0.67284827 0.63745526 0.67284827 0.63745526]
|
|
|
|
mean value: 0.5761262684750457
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.78181818 0.8 0.76363636 0.81818182 0.8
|
|
0.83636364 0.81818182 0.83636364 0.81818182]
|
|
|
|
mean value: 0.7872727272727273
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57692308 0.76923077 0.80701754 0.76363636 0.81481481 0.81967213
|
|
0.84210526 0.82758621 0.84210526 0.82758621]
|
|
|
|
mean value: 0.7890677639721108
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 0.8 0.76666667 0.75 0.81481481 0.75757576
|
|
0.82758621 0.8 0.82758621 0.8 ]
|
|
|
|
mean value: 0.7744229652850343
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.74074074 0.85185185 0.77777778 0.81481481 0.89285714
|
|
0.85714286 0.85714286 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8062169312169312
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.59920635 0.78108466 0.80092593 0.76388889 0.81812169 0.79828042
|
|
0.83597884 0.81746032 0.83597884 0.81746032]
|
|
|
|
mean value: 0.7868386243386243
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.40540541 0.625 0.67647059 0.61764706 0.6875 0.69444444
|
|
0.72727273 0.70588235 0.72727273 0.70588235]
|
|
|
|
mean value: 0.6572777657336482
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01045251 0.01042056 0.01042318 0.01044345 0.01047707 0.01031661
|
|
0.01040959 0.01038814 0.01063156 0.010463 ]
|
|
|
|
mean value: 0.010442566871643067
|
|
|
|
key: score_time
|
|
value: [0.00888085 0.00891566 0.00893712 0.00892591 0.00898528 0.00884223
|
|
0.0088532 0.00897121 0.00899124 0.00891328]
|
|
|
|
mean value: 0.008921599388122559
|
|
|
|
key: test_mcc
|
|
value: [0.3452381 0.45502646 0.30934323 0.41798942 0.35067789 0.35739146
|
|
0.27734221 0.34702017 0.27417243 0.05562919]
|
|
|
|
mean value: 0.3189830546759568
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.67272727 0.72727273 0.65454545 0.70909091 0.67272727 0.67272727
|
|
0.63636364 0.67272727 0.63636364 0.52727273]
|
|
|
|
mean value: 0.6581818181818182
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.72727273 0.62745098 0.7037037 0.68965517 0.71875
|
|
0.61538462 0.66666667 0.62962963 0.51851852]
|
|
|
|
mean value: 0.6563698680648478
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.71428571 0.66666667 0.7037037 0.64516129 0.63888889
|
|
0.66666667 0.69230769 0.65384615 0.53846154]
|
|
|
|
mean value: 0.6586654981816272
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.74074074 0.59259259 0.7037037 0.74074074 0.82142857
|
|
0.57142857 0.64285714 0.60714286 0.5 ]
|
|
|
|
mean value: 0.6587301587301587
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.67261905 0.72751323 0.65343915 0.70899471 0.6739418 0.66997354
|
|
0.63756614 0.67328042 0.63690476 0.52777778]
|
|
|
|
mean value: 0.6582010582010582
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.57142857 0.45714286 0.54285714 0.52631579 0.56097561
|
|
0.44444444 0.5 0.45945946 0.35 ]
|
|
|
|
mean value: 0.4912623874562257
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.92458248 1.95114827 1.93127131 1.95174503 1.92140627 1.95640922
|
|
1.95029449 1.97486663 1.9406321 1.9148674 ]
|
|
|
|
mean value: 1.941722321510315
|
|
|
|
key: score_time
|
|
value: [0.09414029 0.09904289 0.09451675 0.09384274 0.10092402 0.09558034
|
|
0.09308028 0.0980382 0.09257841 0.09364271]
|
|
|
|
mean value: 0.09553866386413574
|
|
|
|
key: test_mcc
|
|
value: [0.64402061 0.75033796 0.67284827 0.78353876 0.81854376 0.85449735
|
|
0.92962225 0.70899471 0.78174603 0.71049701]
|
|
|
|
mean value: 0.7654646714562869
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.87272727 0.83636364 0.89090909 0.90909091 0.92727273
|
|
0.96363636 0.85454545 0.89090909 0.85454545]
|
|
|
|
mean value: 0.8818181818181818
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82758621 0.87719298 0.83018868 0.88461538 0.90566038 0.92857143
|
|
0.96551724 0.85714286 0.89285714 0.86206897]
|
|
|
|
mean value: 0.883140126603983
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.77419355 0.83333333 0.84615385 0.92 0.92307692 0.92857143
|
|
0.93333333 0.85714286 0.89285714 0.83333333]
|
|
|
|
mean value: 0.8741995746189295
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.92592593 0.81481481 0.85185185 0.88888889 0.92857143
|
|
1. 0.85714286 0.89285714 0.89285714]
|
|
|
|
mean value: 0.8941798941798942
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81944444 0.87367725 0.83597884 0.89021164 0.90873016 0.92724868
|
|
0.96296296 0.85449735 0.89087302 0.85383598]
|
|
|
|
mean value: 0.8817460317460317
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.70588235 0.78125 0.70967742 0.79310345 0.82758621 0.86666667
|
|
0.93333333 0.75 0.80645161 0.75757576]
|
|
|
|
mean value: 0.7931526797947412
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.05946684 1.08697295 1.0074203 1.05800724 1.03038883 0.98789191
|
|
1.08265114 0.95895958 1.01394844 1.01338744]
|
|
|
|
mean value: 1.0299094676971436
|
|
|
|
key: score_time
|
|
value: [0.1864717 0.17948318 0.18233967 0.19377112 0.17400479 0.18085146
|
|
0.15881896 0.18842578 0.19034266 0.13840103]
|
|
|
|
mean value: 0.17729103565216064
|
|
|
|
key: test_mcc
|
|
value: [0.67729621 0.78410665 0.63745526 0.81854376 0.81878307 0.89139151
|
|
0.92962225 0.78174603 0.78353876 0.71049701]
|
|
|
|
mean value: 0.7832980506929874
|
|
|
|
key: train_mcc
|
|
value: [0.91575068 0.92383747 0.93168168 0.92802443 0.92754149 0.91967737
|
|
0.93193417 0.92776191 0.93193417 0.93946419]
|
|
|
|
mean value: 0.927760755706162
|
|
|
|
key: test_accuracy
|
|
value: [0.83636364 0.89090909 0.81818182 0.90909091 0.90909091 0.94545455
|
|
0.96363636 0.89090909 0.89090909 0.85454545]
|
|
|
|
mean value: 0.8909090909090909
|
|
|
|
key: train_accuracy
|
|
value: [0.95757576 0.96161616 0.96565657 0.96363636 0.96363636 0.95959596
|
|
0.96565657 0.96363636 0.96565657 0.96969697]
|
|
|
|
mean value: 0.9636363636363636
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.89285714 0.80769231 0.90566038 0.90909091 0.94736842
|
|
0.96551724 0.89285714 0.89655172 0.86206897]
|
|
|
|
mean value: 0.8921769495101002
|
|
|
|
key: train_fscore
|
|
value: [0.95841584 0.96237624 0.96620278 0.96442688 0.96414343 0.96015936
|
|
0.96620278 0.96414343 0.96620278 0.96981891]
|
|
|
|
mean value: 0.9642092435199201
|
|
|
|
key: test_precision
|
|
value: [0.8 0.86206897 0.84 0.92307692 0.89285714 0.93103448
|
|
0.93333333 0.89285714 0.86666667 0.83333333]
|
|
|
|
mean value: 0.8775227990400405
|
|
|
|
key: train_precision
|
|
value: [0.94163424 0.94552529 0.95294118 0.94573643 0.95275591 0.94509804
|
|
0.94921875 0.94901961 0.94921875 0.964 ]
|
|
|
|
mean value: 0.949514819622368
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.92592593 0.77777778 0.88888889 0.92592593 0.96428571
|
|
1. 0.89285714 0.92857143 0.89285714]
|
|
|
|
mean value: 0.9085978835978836
|
|
|
|
key: train_recall
|
|
value: [0.97580645 0.97983871 0.97983871 0.98387097 0.97580645 0.9757085
|
|
0.98380567 0.97975709 0.98380567 0.9757085 ]
|
|
|
|
mean value: 0.9793946715423796
|
|
|
|
key: test_roc_auc
|
|
value: [0.83730159 0.89153439 0.81746032 0.90873016 0.90939153 0.94510582
|
|
0.96296296 0.89087302 0.89021164 0.85383598]
|
|
|
|
mean value: 0.8907407407407407
|
|
|
|
key: train_roc_auc
|
|
value: [0.95753885 0.96157927 0.96562786 0.9635954 0.96361173 0.95962844
|
|
0.96569316 0.96366887 0.96569316 0.96970909]
|
|
|
|
mean value: 0.9636345827347526
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.80645161 0.67741935 0.82758621 0.83333333 0.9
|
|
0.93333333 0.80645161 0.8125 0.75757576]
|
|
|
|
mean value: 0.8081923939056864
|
|
|
|
key: train_jcc
|
|
value: [0.92015209 0.92748092 0.93461538 0.93129771 0.93076923 0.92337165
|
|
0.93461538 0.93076923 0.93461538 0.94140625]
|
|
|
|
mean value: 0.9309093230103145
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0125699 0.01251554 0.01251912 0.01254106 0.01248789 0.01241875
|
|
0.01253629 0.01246715 0.01252913 0.01242185]
|
|
|
|
mean value: 0.012500667572021484
|
|
|
|
key: score_time
|
|
value: [0.01050663 0.01048708 0.0106113 0.01052618 0.01047444 0.01048017
|
|
0.01057601 0.01042986 0.01046348 0.01046515]
|
|
|
|
mean value: 0.010502028465270995
|
|
|
|
key: test_mcc
|
|
value: [0.24772018 0.35634832 0.49074074 0.45502646 0.17285969 0.50088476
|
|
0.6005291 0.36428053 0.52935027 0.49271566]
|
|
|
|
mean value: 0.4210455702794488
|
|
|
|
key: train_mcc
|
|
value: [0.53998213 0.50367096 0.49143523 0.51644632 0.57433926 0.50917202
|
|
0.49499616 0.52323038 0.49175236 0.49928143]
|
|
|
|
mean value: 0.5144306254235248
|
|
|
|
key: test_accuracy
|
|
value: [0.61818182 0.67272727 0.74545455 0.72727273 0.58181818 0.74545455
|
|
0.8 0.67272727 0.76363636 0.74545455]
|
|
|
|
mean value: 0.7072727272727273
|
|
|
|
key: train_accuracy
|
|
value: [0.76969697 0.75151515 0.74545455 0.75757576 0.78585859 0.75353535
|
|
0.74747475 0.76161616 0.74545455 0.74949495]
|
|
|
|
mean value: 0.7567676767676768
|
|
|
|
key: test_fscore
|
|
value: [0.6557377 0.7 0.74074074 0.72727273 0.62295082 0.77419355
|
|
0.8 0.625 0.77966102 0.74074074]
|
|
|
|
mean value: 0.7166297298680622
|
|
|
|
key: train_fscore
|
|
value: [0.77559055 0.75834971 0.7519685 0.76653696 0.79615385 0.76356589
|
|
0.74849095 0.7611336 0.7519685 0.75298805]
|
|
|
|
mean value: 0.7626746563688571
|
|
|
|
key: test_precision
|
|
value: [0.58823529 0.63636364 0.74074074 0.71428571 0.55882353 0.70588235
|
|
0.81481481 0.75 0.74193548 0.76923077]
|
|
|
|
mean value: 0.7020312335777231
|
|
|
|
key: train_precision
|
|
value: [0.75769231 0.7394636 0.73461538 0.7406015 0.76102941 0.73234201
|
|
0.744 0.7611336 0.73180077 0.74117647]
|
|
|
|
mean value: 0.7443855056909935
|
|
|
|
key: test_recall
|
|
value: [0.74074074 0.77777778 0.74074074 0.74074074 0.7037037 0.85714286
|
|
0.78571429 0.53571429 0.82142857 0.71428571]
|
|
|
|
mean value: 0.7417989417989418
|
|
|
|
key: train_recall
|
|
value: [0.79435484 0.77822581 0.77016129 0.79435484 0.83467742 0.79757085
|
|
0.75303644 0.7611336 0.77327935 0.76518219]
|
|
|
|
mean value: 0.7821976622698185
|
|
|
|
key: test_roc_auc
|
|
value: [0.62037037 0.67460317 0.74537037 0.72751323 0.58399471 0.74338624
|
|
0.80026455 0.67526455 0.76256614 0.74603175]
|
|
|
|
mean value: 0.707936507936508
|
|
|
|
key: train_roc_auc
|
|
value: [0.76964705 0.75146108 0.74540453 0.75750131 0.78575976 0.75362413
|
|
0.74748596 0.76161519 0.74551064 0.74952658]
|
|
|
|
mean value: 0.7567536241347786
|
|
|
|
key: test_jcc
|
|
value: [0.48780488 0.53846154 0.58823529 0.57142857 0.45238095 0.63157895
|
|
0.66666667 0.45454545 0.63888889 0.58823529]
|
|
|
|
mean value: 0.5618226486024568
|
|
|
|
key: train_jcc
|
|
value: [0.63344051 0.61075949 0.60252366 0.6214511 0.66134185 0.61755486
|
|
0.59807074 0.61437908 0.60252366 0.60383387]
|
|
|
|
mean value: 0.6165878833154442
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.62707305 0.62119603 1.14749575 0.59021926 0.94702363 0.77251482
|
|
0.71458483 1.27654052 0.6743052 1.20825052]
|
|
|
|
mean value: 0.8579203605651855
|
|
|
|
key: score_time
|
|
value: [0.01454377 0.01678991 0.01452255 0.01261163 0.01532197 0.01275706
|
|
0.01505327 0.01521182 0.01253653 0.01447082]
|
|
|
|
mean value: 0.014381933212280273
|
|
|
|
key: test_mcc
|
|
value: [0.85695439 0.78410665 0.74935731 0.83147942 0.8565805 0.92724868
|
|
0.92962225 0.8565805 0.78174603 0.74935731]
|
|
|
|
mean value: 0.8323033055110775
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92727273 0.89090909 0.87272727 0.90909091 0.92727273 0.96363636
|
|
0.96363636 0.92727273 0.89090909 0.87272727]
|
|
|
|
mean value: 0.9145454545454546
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92857143 0.89285714 0.8627451 0.89795918 0.92307692 0.96428571
|
|
0.96551724 0.93103448 0.89285714 0.88135593]
|
|
|
|
mean value: 0.9140260289702358
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89655172 0.86206897 0.91666667 1. 0.96 0.96428571
|
|
0.93333333 0.9 0.89285714 0.83870968]
|
|
|
|
mean value: 0.9164473224217384
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96296296 0.92592593 0.81481481 0.81481481 0.88888889 0.96428571
|
|
1. 0.96428571 0.89285714 0.92857143]
|
|
|
|
mean value: 0.9157407407407407
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92791005 0.89153439 0.87169312 0.90740741 0.9265873 0.96362434
|
|
0.96296296 0.9265873 0.89087302 0.87169312]
|
|
|
|
mean value: 0.9140873015873017
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.86666667 0.80645161 0.75862069 0.81481481 0.85714286 0.93103448
|
|
0.93333333 0.87096774 0.80645161 0.78787879]
|
|
|
|
mean value: 0.8433362599992188
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05233717 0.07362056 0.0506649 0.08480668 0.09400225 0.07897758
|
|
0.07774997 0.07809353 0.07805419 0.0787487 ]
|
|
|
|
mean value: 0.07470555305480957
|
|
|
|
key: score_time
|
|
value: [0.01890683 0.01224971 0.01779628 0.01238489 0.01915932 0.01917076
|
|
0.01915264 0.0190208 0.02379823 0.01930976]
|
|
|
|
mean value: 0.018094921112060548
|
|
|
|
key: test_mcc
|
|
value: [0.38227513 0.6005291 0.60000053 0.52715278 0.63690774 0.68300095
|
|
0.60000053 0.60000053 0.63745526 0.67284827]
|
|
|
|
mean value: 0.5940170818135115
|
|
|
|
key: train_mcc
|
|
value: [0.85942197 0.84439654 0.85205281 0.82738586 0.83547873 0.84299263
|
|
0.83883199 0.85169575 0.8436046 0.85480318]
|
|
|
|
mean value: 0.8450664056592706
|
|
|
|
key: test_accuracy
|
|
value: [0.69090909 0.8 0.8 0.76363636 0.8 0.83636364
|
|
0.8 0.8 0.81818182 0.83636364]
|
|
|
|
mean value: 0.7945454545454546
|
|
|
|
key: train_accuracy
|
|
value: [0.92929293 0.92121212 0.92525253 0.91313131 0.91717172 0.92121212
|
|
0.91919192 0.92525253 0.92121212 0.92727273]
|
|
|
|
mean value: 0.9220202020202021
|
|
|
|
key: test_fscore
|
|
value: [0.69090909 0.8 0.79245283 0.75471698 0.82539683 0.85245902
|
|
0.80701754 0.80701754 0.82758621 0.84210526]
|
|
|
|
mean value: 0.7999661301793858
|
|
|
|
key: train_fscore
|
|
value: [0.93096647 0.92397661 0.92759295 0.91552063 0.9194499 0.92246521
|
|
0.92031873 0.9270217 0.92307692 0.928 ]
|
|
|
|
mean value: 0.923838911623373
|
|
|
|
key: test_precision
|
|
value: [0.67857143 0.78571429 0.80769231 0.76923077 0.72222222 0.78787879
|
|
0.79310345 0.79310345 0.8 0.82758621]
|
|
|
|
mean value: 0.7765102904758078
|
|
|
|
key: train_precision
|
|
value: [0.91119691 0.89433962 0.90114068 0.89272031 0.89655172 0.90625
|
|
0.90588235 0.90384615 0.9 0.91699605]
|
|
|
|
mean value: 0.9028923803118568
|
|
|
|
key: test_recall
|
|
value: [0.7037037 0.81481481 0.77777778 0.74074074 0.96296296 0.92857143
|
|
0.82142857 0.82142857 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8285714285714285
|
|
|
|
key: train_recall
|
|
value: [0.9516129 0.95564516 0.95564516 0.93951613 0.94354839 0.93927126
|
|
0.93522267 0.951417 0.94736842 0.93927126]
|
|
|
|
mean value: 0.9458518349222933
|
|
|
|
key: test_roc_auc
|
|
value: [0.69113757 0.80026455 0.79960317 0.76322751 0.80291005 0.83465608
|
|
0.79960317 0.79960317 0.81746032 0.83597884]
|
|
|
|
mean value: 0.7944444444444445
|
|
|
|
key: train_roc_auc
|
|
value: [0.92924775 0.92114242 0.925191 0.9130779 0.91711832 0.92124853
|
|
0.91922424 0.92530528 0.92126486 0.92729692]
|
|
|
|
mean value: 0.9220117213007705
|
|
|
|
key: test_jcc
|
|
value: [0.52777778 0.66666667 0.65625 0.60606061 0.7027027 0.74285714
|
|
0.67647059 0.67647059 0.70588235 0.72727273]
|
|
|
|
mean value: 0.6688411152749388
|
|
|
|
key: train_jcc
|
|
value: [0.87084871 0.85869565 0.8649635 0.8442029 0.85090909 0.85608856
|
|
0.85239852 0.86397059 0.85714286 0.86567164]
|
|
|
|
mean value: 0.8584892025810493
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02402616 0.0115273 0.01063681 0.01021409 0.01022005 0.01077056
|
|
0.01057315 0.01097369 0.01075101 0.01064754]
|
|
|
|
mean value: 0.012034034729003907
|
|
|
|
key: score_time
|
|
value: [0.01027036 0.00953054 0.01007247 0.00907731 0.00901937 0.00954294
|
|
0.00909209 0.00991511 0.00939345 0.00938368]
|
|
|
|
mean value: 0.009529733657836914
|
|
|
|
key: test_mcc
|
|
value: [0.35067789 0.49271566 0.30934323 0.7112589 0.33096953 0.54298418
|
|
0.57574525 0.53121272 0.56841568 0.63624339]
|
|
|
|
mean value: 0.504956642941674
|
|
|
|
key: train_mcc
|
|
value: [0.54145344 0.53948473 0.50257265 0.54882058 0.56238183 0.51691863
|
|
0.50956573 0.51728351 0.55283686 0.5315745 ]
|
|
|
|
mean value: 0.5322892455982188
|
|
|
|
key: test_accuracy
|
|
value: [0.67272727 0.74545455 0.65454545 0.85454545 0.65454545 0.76363636
|
|
0.78181818 0.76363636 0.78181818 0.81818182]
|
|
|
|
mean value: 0.7490909090909091
|
|
|
|
key: train_accuracy
|
|
value: [0.76969697 0.76767677 0.74949495 0.77171717 0.77979798 0.75757576
|
|
0.75353535 0.75757576 0.77373737 0.76363636]
|
|
|
|
mean value: 0.7644444444444445
|
|
|
|
key: test_fscore
|
|
value: [0.68965517 0.75 0.62745098 0.85714286 0.6984127 0.79365079
|
|
0.80645161 0.75471698 0.8 0.82142857]
|
|
|
|
mean value: 0.7598909667476172
|
|
|
|
key: train_fscore
|
|
value: [0.77992278 0.78178368 0.76425856 0.78719397 0.79078695 0.76653696
|
|
0.76447876 0.76744186 0.78787879 0.77714286]
|
|
|
|
mean value: 0.7767425173027587
|
|
|
|
key: test_precision
|
|
value: [0.64516129 0.72413793 0.66666667 0.82758621 0.61111111 0.71428571
|
|
0.73529412 0.8 0.75 0.82142857]
|
|
|
|
mean value: 0.7295671609392738
|
|
|
|
key: train_precision
|
|
value: [0.74814815 0.73835125 0.72302158 0.7385159 0.75457875 0.73782772
|
|
0.73062731 0.73605948 0.74021352 0.73381295]
|
|
|
|
mean value: 0.7381156614955805
|
|
|
|
key: test_recall
|
|
value: [0.74074074 0.77777778 0.59259259 0.88888889 0.81481481 0.89285714
|
|
0.89285714 0.71428571 0.85714286 0.82142857]
|
|
|
|
mean value: 0.7993386243386243
|
|
|
|
key: train_recall
|
|
value: [0.81451613 0.83064516 0.81048387 0.84274194 0.83064516 0.79757085
|
|
0.80161943 0.80161943 0.84210526 0.82591093]
|
|
|
|
mean value: 0.8197858168995691
|
|
|
|
key: test_roc_auc
|
|
value: [0.6739418 0.74603175 0.65343915 0.85515873 0.65740741 0.76124339
|
|
0.7797619 0.76455026 0.78042328 0.81812169]
|
|
|
|
mean value: 0.7490079365079365
|
|
|
|
key: train_roc_auc
|
|
value: [0.76960624 0.7675493 0.74937149 0.7715734 0.77969505 0.75765639
|
|
0.7536323 0.75766456 0.77387521 0.76376192]
|
|
|
|
mean value: 0.7644385856079404
|
|
|
|
key: test_jcc
|
|
value: [0.52631579 0.6 0.45714286 0.75 0.53658537 0.65789474
|
|
0.67567568 0.60606061 0.66666667 0.6969697 ]
|
|
|
|
mean value: 0.617331139468495
|
|
|
|
key: train_jcc
|
|
value: [0.63924051 0.64174455 0.61846154 0.64906832 0.65396825 0.6214511
|
|
0.61875 0.62264151 0.65 0.63551402]
|
|
|
|
mean value: 0.6350839802253374
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01411009 0.01898456 0.01716137 0.02354026 0.01881456 0.02513814
|
|
0.02284837 0.02281046 0.02182651 0.02141547]
|
|
|
|
mean value: 0.02066497802734375
|
|
|
|
key: score_time
|
|
value: [0.0108521 0.01172495 0.01195383 0.01198554 0.01201916 0.01236916
|
|
0.01207137 0.01207829 0.01250148 0.01204801]
|
|
|
|
mean value: 0.011960387229919434
|
|
|
|
key: test_mcc
|
|
value: [0.38227513 0.35634832 0.60000053 0.35634832 0.30197505 0.71049701
|
|
0.78410665 0.53121272 0.49939969 0.47230166]
|
|
|
|
mean value: 0.4994465070574524
|
|
|
|
key: train_mcc
|
|
value: [0.69821516 0.38020192 0.74548484 0.37955483 0.57647436 0.78586587
|
|
0.68422859 0.75572697 0.55806556 0.71471959]
|
|
|
|
mean value: 0.6278537692242491
|
|
|
|
key: test_accuracy
|
|
value: [0.69090909 0.61818182 0.8 0.61818182 0.6 0.85454545
|
|
0.89090909 0.76363636 0.72727273 0.72727273]
|
|
|
|
mean value: 0.7290909090909091
|
|
|
|
key: train_accuracy
|
|
value: [0.84848485 0.63636364 0.87070707 0.62828283 0.75353535 0.89292929
|
|
0.83232323 0.87676768 0.74545455 0.84848485]
|
|
|
|
mean value: 0.7933333333333333
|
|
|
|
key: test_fscore
|
|
value: [0.69090909 0.36363636 0.79245283 0.36363636 0.7027027 0.86206897
|
|
0.88888889 0.75471698 0.66666667 0.69387755]
|
|
|
|
mean value: 0.6779556404298481
|
|
|
|
key: train_fscore
|
|
value: [0.85322896 0.44444444 0.87739464 0.41401274 0.80130293 0.89292929
|
|
0.8091954 0.87157895 0.66489362 0.82915718]
|
|
|
|
mean value: 0.7458138148743843
|
|
|
|
key: test_precision
|
|
value: [0.67857143 1. 0.80769231 1. 0.55319149 0.83333333
|
|
0.92307692 0.8 0.88235294 0.80952381]
|
|
|
|
mean value: 0.8287742232735975
|
|
|
|
key: train_precision
|
|
value: [0.82889734 0.94736842 0.83576642 0.98484848 0.67213115 0.89112903
|
|
0.93617021 0.90789474 0.96899225 0.94791667]
|
|
|
|
mean value: 0.8921114711797615
|
|
|
|
key: test_recall
|
|
value: [0.7037037 0.22222222 0.77777778 0.22222222 0.96296296 0.89285714
|
|
0.85714286 0.71428571 0.53571429 0.60714286]
|
|
|
|
mean value: 0.6496031746031746
|
|
|
|
key: train_recall
|
|
value: [0.87903226 0.29032258 0.9233871 0.26209677 0.99193548 0.89473684
|
|
0.71255061 0.83805668 0.50607287 0.73684211]
|
|
|
|
mean value: 0.7035033302860128
|
|
|
|
key: test_roc_auc
|
|
value: [0.69113757 0.61111111 0.79960317 0.61111111 0.60648148 0.85383598
|
|
0.89153439 0.76455026 0.73082011 0.72949735]
|
|
|
|
mean value: 0.7289682539682539
|
|
|
|
key: train_roc_auc
|
|
value: [0.84842301 0.63706412 0.87060043 0.6290241 0.75305276 0.89293294
|
|
0.83208176 0.87668963 0.74497192 0.84825976]
|
|
|
|
mean value: 0.793310043097819
|
|
|
|
key: test_jcc
|
|
value: [0.52777778 0.22222222 0.65625 0.22222222 0.54166667 0.75757576
|
|
0.8 0.60606061 0.5 0.53125 ]
|
|
|
|
mean value: 0.5365025252525253
|
|
|
|
key: train_jcc
|
|
value: [0.7440273 0.28571429 0.78156997 0.26104418 0.66847826 0.80656934
|
|
0.67953668 0.77238806 0.49800797 0.70817121]
|
|
|
|
mean value: 0.6205507249572288
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02308059 0.02244234 0.02664232 0.02540946 0.02339721 0.02050805
|
|
0.02283049 0.02475214 0.04096031 0.03317499]
|
|
|
|
mean value: 0.02631978988647461
|
|
|
|
key: score_time
|
|
value: [0.01259327 0.02270007 0.04861331 0.01219225 0.01369572 0.01507545
|
|
0.01203299 0.01277637 0.02333689 0.01207447]
|
|
|
|
mean value: 0.01850907802581787
|
|
|
|
key: test_mcc
|
|
value: [0.50088476 0.60242771 0.57218881 0.57381904 0.49074074 0.68300095
|
|
0.62360956 0.49939969 0.41825096 0.71049701]
|
|
|
|
mean value: 0.5674819225572761
|
|
|
|
key: train_mcc
|
|
value: [0.72700388 0.63669365 0.69771673 0.6215181 0.73959527 0.78120151
|
|
0.63032393 0.67006133 0.54885015 0.82660248]
|
|
|
|
mean value: 0.687956702695701
|
|
|
|
key: test_accuracy
|
|
value: [0.74545455 0.78181818 0.76363636 0.74545455 0.74545455 0.83636364
|
|
0.78181818 0.72727273 0.67272727 0.85454545]
|
|
|
|
mean value: 0.7654545454545455
|
|
|
|
key: train_accuracy
|
|
value: [0.86060606 0.8020202 0.83838384 0.78383838 0.86868687 0.88686869
|
|
0.78585859 0.82222222 0.73333333 0.91313131]
|
|
|
|
mean value: 0.8294949494949495
|
|
|
|
key: test_fscore
|
|
value: [0.70833333 0.72727273 0.69767442 0.79411765 0.74074074 0.85245902
|
|
0.82352941 0.66666667 0.75 0.86206897]
|
|
|
|
mean value: 0.7622862927352332
|
|
|
|
key: train_fscore
|
|
value: [0.8516129 0.76555024 0.81651376 0.82077052 0.86373166 0.89393939
|
|
0.82274247 0.79342723 0.78846154 0.91417166]
|
|
|
|
mean value: 0.833092137342651
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.94117647 0.9375 0.65853659 0.74074074 0.78787879
|
|
0.7 0.88235294 0.61363636 0.83333333]
|
|
|
|
mean value: 0.7904679032243594
|
|
|
|
key: train_precision
|
|
value: [0.9124424 0.94117647 0.94680851 0.70200573 0.89956332 0.83985765
|
|
0.7008547 0.94413408 0.65251989 0.9015748 ]
|
|
|
|
mean value: 0.8440937554337569
|
|
|
|
key: test_recall
|
|
value: [0.62962963 0.59259259 0.55555556 1. 0.74074074 0.92857143
|
|
1. 0.53571429 0.96428571 0.89285714]
|
|
|
|
mean value: 0.783994708994709
|
|
|
|
key: train_recall
|
|
value: [0.7983871 0.64516129 0.71774194 0.98790323 0.83064516 0.95546559
|
|
0.99595142 0.68421053 0.99595142 0.92712551]
|
|
|
|
mean value: 0.8538543163118715
|
|
|
|
key: test_roc_auc
|
|
value: [0.74338624 0.77843915 0.75992063 0.75 0.74537037 0.83465608
|
|
0.77777778 0.73082011 0.66732804 0.85383598]
|
|
|
|
mean value: 0.7641534391534391
|
|
|
|
key: train_roc_auc
|
|
value: [0.86073201 0.80233773 0.83862805 0.7834253 0.86876388 0.88700699
|
|
0.78628216 0.82194397 0.73386281 0.91315953]
|
|
|
|
mean value: 0.8296142418701841
|
|
|
|
key: test_jcc
|
|
value: [0.5483871 0.57142857 0.53571429 0.65853659 0.58823529 0.74285714
|
|
0.7 0.5 0.6 0.75757576]
|
|
|
|
mean value: 0.6202734733833452
|
|
|
|
key: train_jcc
|
|
value: [0.74157303 0.62015504 0.68992248 0.69602273 0.7601476 0.80821918
|
|
0.69886364 0.65758755 0.65079365 0.84191176]
|
|
|
|
mean value: 0.7165196660419946
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22894526 0.21543217 0.20463419 0.41751027 0.43829036 0.33181643
|
|
0.44828176 0.44483876 0.20829892 0.18927455]
|
|
|
|
mean value: 0.31273226737976073
|
|
|
|
key: score_time
|
|
value: [0.01846552 0.01660538 0.07777667 0.06055355 0.04227638 0.02122521
|
|
0.02133203 0.05282283 0.01568341 0.01556087]
|
|
|
|
mean value: 0.03423018455505371
|
|
|
|
key: test_mcc
|
|
value: [0.81878307 0.78410665 0.78353876 0.67602163 0.81854376 0.96423926
|
|
0.89602867 0.74935731 0.78961518 0.71588202]
|
|
|
|
mean value: 0.7996116309415696
|
|
|
|
key: train_mcc
|
|
value: [0.97172476 0.95971983 0.97588415 0.96770771 0.96780199 0.95556281
|
|
0.95959579 0.97178776 0.97575748 0.97178914]
|
|
|
|
mean value: 0.9677331418612027
|
|
|
|
key: test_accuracy
|
|
value: [0.90909091 0.89090909 0.89090909 0.83636364 0.90909091 0.98181818
|
|
0.94545455 0.87272727 0.89090909 0.85454545]
|
|
|
|
mean value: 0.8981818181818182
|
|
|
|
key: train_accuracy
|
|
value: [0.98585859 0.97979798 0.98787879 0.98383838 0.98383838 0.97777778
|
|
0.97979798 0.98585859 0.98787879 0.98585859]
|
|
|
|
mean value: 0.9838383838383838
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.89285714 0.88461538 0.82352941 0.90566038 0.98245614
|
|
0.94915254 0.88135593 0.9 0.86666667]
|
|
|
|
mean value: 0.8995384507280448
|
|
|
|
key: train_fscore
|
|
value: [0.98591549 0.98 0.988 0.98393574 0.984 0.97768763
|
|
0.97975709 0.98574338 0.98785425 0.98591549]
|
|
|
|
mean value: 0.9838809072550014
|
|
|
|
key: test_precision
|
|
value: [0.89285714 0.86206897 0.92 0.875 0.92307692 0.96551724
|
|
0.90322581 0.83870968 0.84375 0.8125 ]
|
|
|
|
mean value: 0.8836705756701586
|
|
|
|
key: train_precision
|
|
value: [0.98393574 0.97222222 0.98015873 0.98 0.97619048 0.9796748
|
|
0.97975709 0.99180328 0.98785425 0.98 ]
|
|
|
|
mean value: 0.9811596583012197
|
|
|
|
key: test_recall
|
|
value: [0.92592593 0.92592593 0.85185185 0.77777778 0.88888889 1.
|
|
1. 0.92857143 0.96428571 0.92857143]
|
|
|
|
mean value: 0.9191798941798942
|
|
|
|
key: train_recall
|
|
value: [0.98790323 0.98790323 0.99596774 0.98790323 0.99193548 0.9757085
|
|
0.97975709 0.97975709 0.98785425 0.99190283]
|
|
|
|
mean value: 0.9866592660310827
|
|
|
|
key: test_roc_auc
|
|
value: [0.90939153 0.89153439 0.89021164 0.83531746 0.90873016 0.98148148
|
|
0.94444444 0.87169312 0.88955026 0.8531746 ]
|
|
|
|
mean value: 0.89755291005291
|
|
|
|
key: train_roc_auc
|
|
value: [0.98585445 0.97978157 0.98786241 0.98383016 0.98382199 0.97777361
|
|
0.9797979 0.98584628 0.98787874 0.98587077]
|
|
|
|
mean value: 0.9838317879064908
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.80645161 0.79310345 0.7 0.82758621 0.96551724
|
|
0.90322581 0.78787879 0.81818182 0.76470588]
|
|
|
|
mean value: 0.8199984137653443
|
|
|
|
key: train_jcc
|
|
value: [0.97222222 0.96078431 0.97628458 0.96837945 0.96850394 0.95634921
|
|
0.96031746 0.97188755 0.976 0.97222222]
|
|
|
|
mean value: 0.9682950943665832
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.089432 0.08917665 0.08194613 0.08701253 0.07810664 0.09168124
|
|
0.10445046 0.08291268 0.08497047 0.07707 ]
|
|
|
|
mean value: 0.08667588233947754
|
|
|
|
key: score_time
|
|
value: [0.02384543 0.0222857 0.02403498 0.02918339 0.03180027 0.03381395
|
|
0.02989078 0.03108311 0.02549171 0.018718 ]
|
|
|
|
mean value: 0.027014732360839844
|
|
|
|
key: test_mcc
|
|
value: [0.81854376 0.79069197 0.74569602 0.82269299 0.8565805 0.96428571
|
|
0.92962225 0.78353876 0.81854376 0.67284827]
|
|
|
|
mean value: 0.820304400616813
|
|
|
|
key: train_mcc
|
|
value: [0.96796267 0.97980606 0.98387018 0.96780409 0.9637079 0.98396631
|
|
0.97178914 0.97578894 0.97578973 0.95962647]
|
|
|
|
mean value: 0.973011148778528
|
|
|
|
key: test_accuracy
|
|
value: [0.90909091 0.89090909 0.87272727 0.90909091 0.92727273 0.98181818
|
|
0.96363636 0.89090909 0.90909091 0.83636364]
|
|
|
|
mean value: 0.9090909090909091
|
|
|
|
key: train_accuracy
|
|
value: [0.98383838 0.98989899 0.99191919 0.98383838 0.98181818 0.99191919
|
|
0.98585859 0.98787879 0.98787879 0.97979798]
|
|
|
|
mean value: 0.9864646464646465
|
|
|
|
key: test_fscore
|
|
value: [0.90566038 0.89655172 0.86792453 0.90196078 0.92307692 0.98181818
|
|
0.96551724 0.89655172 0.9122807 0.84210526]
|
|
|
|
mean value: 0.9093447449436661
|
|
|
|
key: train_fscore
|
|
value: [0.98367347 0.98989899 0.99196787 0.98373984 0.98174442 0.99183673
|
|
0.98591549 0.98780488 0.98790323 0.9796748 ]
|
|
|
|
mean value: 0.986415971833258
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.83870968 0.88461538 0.95833333 0.96 1.
|
|
0.93333333 0.86666667 0.89655172 0.82758621]
|
|
|
|
mean value: 0.9088873249479479
|
|
|
|
key: train_precision
|
|
value: [0.99586777 0.99190283 0.988 0.99180328 0.9877551 1.
|
|
0.98 0.99183673 0.98393574 0.98367347]
|
|
|
|
mean value: 0.9894774930386
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.96296296 0.85185185 0.85185185 0.88888889 0.96428571
|
|
1. 0.92857143 0.92857143 0.85714286]
|
|
|
|
mean value: 0.9123015873015873
|
|
|
|
key: train_recall
|
|
value: [0.97177419 0.98790323 0.99596774 0.97580645 0.97580645 0.98380567
|
|
0.99190283 0.98380567 0.99190283 0.9757085 ]
|
|
|
|
mean value: 0.9834383570589004
|
|
|
|
key: test_roc_auc
|
|
value: [0.90873016 0.89219577 0.8723545 0.90806878 0.9265873 0.98214286
|
|
0.96296296 0.89021164 0.90873016 0.83597884]
|
|
|
|
mean value: 0.9087962962962963
|
|
|
|
key: train_roc_auc
|
|
value: [0.98386281 0.98990303 0.991911 0.98385464 0.98183035 0.99190283
|
|
0.98587077 0.98787058 0.9878869 0.97978973]
|
|
|
|
mean value: 0.9864682643332898
|
|
|
|
key: test_jcc
|
|
value: [0.82758621 0.8125 0.76666667 0.82142857 0.85714286 0.96428571
|
|
0.93333333 0.8125 0.83870968 0.72727273]
|
|
|
|
mean value: 0.8361425754445777
|
|
|
|
key: train_jcc
|
|
value: [0.96787149 0.98 0.98406375 0.968 0.96414343 0.98380567
|
|
0.97222222 0.97590361 0.97609562 0.96015936]
|
|
|
|
mean value: 0.9732265142034445
|
|
|
|
MCC on Blind test: 0.58
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.25664544 0.15936446 0.21874404 0.1943872 0.2169714 0.22320032
|
|
0.16605163 0.19035244 0.19505453 0.20258379]
|
|
|
|
mean value: 0.20233552455902098
|
|
|
|
key: score_time
|
|
value: [0.03570342 0.02499676 0.02505422 0.02504539 0.02501488 0.02500081
|
|
0.01541805 0.02505493 0.03012323 0.0249331 ]
|
|
|
|
mean value: 0.02563447952270508
|
|
|
|
key: test_mcc
|
|
value: [0.27248677 0.45601459 0.63624339 0.53758181 0.34702017 0.51036057
|
|
0.38227513 0.3452381 0.46028325 0.45601459]
|
|
|
|
mean value: 0.44035183787988225
|
|
|
|
key: train_mcc
|
|
value: [0.96383145 0.96009491 0.96427139 0.96383145 0.96795956 0.95594029
|
|
0.95201552 0.94810536 0.96383439 0.94785667]
|
|
|
|
mean value: 0.9587740971376391
|
|
|
|
key: test_accuracy
|
|
value: [0.63636364 0.72727273 0.81818182 0.76363636 0.67272727 0.74545455
|
|
0.69090909 0.67272727 0.72727273 0.72727273]
|
|
|
|
mean value: 0.7181818181818183
|
|
|
|
key: train_accuracy
|
|
value: [0.98181818 0.97979798 0.98181818 0.98181818 0.98383838 0.97777778
|
|
0.97575758 0.97373737 0.98181818 0.97373737]
|
|
|
|
mean value: 0.9791919191919192
|
|
|
|
key: test_fscore
|
|
value: [0.62962963 0.70588235 0.81481481 0.77966102 0.67857143 0.78125
|
|
0.69090909 0.67857143 0.75409836 0.74576271]
|
|
|
|
mean value: 0.7259150834906866
|
|
|
|
key: train_fscore
|
|
value: [0.98203593 0.98015873 0.98217822 0.98203593 0.98406375 0.97804391
|
|
0.97609562 0.97415507 0.98196393 0.9740519 ]
|
|
|
|
mean value: 0.9794782972639188
|
|
|
|
key: test_precision
|
|
value: [0.62962963 0.75 0.81481481 0.71875 0.65517241 0.69444444
|
|
0.7037037 0.67857143 0.6969697 0.70967742]
|
|
|
|
mean value: 0.705173355128166
|
|
|
|
key: train_precision
|
|
value: [0.97233202 0.96484375 0.96498054 0.97233202 0.97244094 0.96456693
|
|
0.96078431 0.95703125 0.97222222 0.96062992]
|
|
|
|
mean value: 0.9662163907590938
|
|
|
|
key: test_recall
|
|
value: [0.62962963 0.66666667 0.81481481 0.85185185 0.7037037 0.89285714
|
|
0.67857143 0.67857143 0.82142857 0.78571429]
|
|
|
|
mean value: 0.7523809523809524
|
|
|
|
key: train_recall
|
|
value: [0.99193548 0.99596774 1. 0.99193548 0.99596774 0.99190283
|
|
0.99190283 0.99190283 0.99190283 0.98785425]
|
|
|
|
mean value: 0.9931272038657438
|
|
|
|
key: test_roc_auc
|
|
value: [0.63624339 0.72619048 0.81812169 0.76521164 0.67328042 0.74272487
|
|
0.69113757 0.67261905 0.7255291 0.72619048]
|
|
|
|
mean value: 0.7177248677248678
|
|
|
|
key: train_roc_auc
|
|
value: [0.9817977 0.97976525 0.98178138 0.9817977 0.98381383 0.97780626
|
|
0.97579013 0.973774 0.98183851 0.97376584]
|
|
|
|
mean value: 0.9791930586391537
|
|
|
|
key: test_jcc
|
|
value: [0.45945946 0.54545455 0.6875 0.63888889 0.51351351 0.64102564
|
|
0.52777778 0.51351351 0.60526316 0.59459459]
|
|
|
|
mean value: 0.5726991092122671
|
|
|
|
key: train_jcc
|
|
value: [0.96470588 0.96108949 0.96498054 0.96470588 0.96862745 0.95703125
|
|
0.95330739 0.9496124 0.96456693 0.94941634]
|
|
|
|
mean value: 0.9598043572239974
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.74748325 0.74482059 0.7401557 0.73949742 1.064147 1.76007342
|
|
1.68261003 1.70932341 0.93628144 0.7971375 ]
|
|
|
|
mean value: 1.092152976989746
|
|
|
|
key: score_time
|
|
value: [0.00951362 0.00931072 0.00927544 0.01322484 0.02140093 0.02280068
|
|
0.03278399 0.02068853 0.00982618 0.01017642]
|
|
|
|
mean value: 0.015900135040283203
|
|
|
|
key: test_mcc
|
|
value: [0.89153439 0.75033796 0.78353876 0.78961518 0.8565805 0.89153439
|
|
0.92962225 0.8565805 0.81854376 0.74935731]
|
|
|
|
mean value: 0.8317245016129459
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99596768 0.99596768 1. 0.99596768 0.99596774
|
|
0.9878869 0.99596774 0.99596774 0.99195142]
|
|
|
|
mean value: 0.9955644571218162
|
|
|
|
key: test_accuracy
|
|
value: [0.94545455 0.87272727 0.89090909 0.89090909 0.92727273 0.94545455
|
|
0.96363636 0.92727273 0.90909091 0.87272727]
|
|
|
|
mean value: 0.9145454545454546
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.9979798 0.9979798 1. 0.9979798 0.9979798
|
|
0.99393939 0.9979798 0.9979798 0.9959596 ]
|
|
|
|
mean value: 0.9977777777777778
|
|
|
|
key: test_fscore
|
|
value: [0.94545455 0.87719298 0.88461538 0.88 0.92307692 0.94545455
|
|
0.96551724 0.93103448 0.9122807 0.88135593]
|
|
|
|
mean value: 0.9145982739153246
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99798793 0.99798793 1. 0.99798793 0.9979798
|
|
0.99393939 0.9979798 0.9979798 0.99593496]
|
|
|
|
mean value: 0.9977777529924559
|
|
|
|
key: test_precision
|
|
value: [0.92857143 0.83333333 0.92 0.95652174 0.96 0.96296296
|
|
0.93333333 0.9 0.89655172 0.83870968]
|
|
|
|
mean value: 0.9129984198888779
|
|
|
|
key: train_precision
|
|
value: [1. 0.99598394 0.99598394 1. 0.99598394 0.99596774
|
|
0.99193548 0.99596774 0.99596774 1. ]
|
|
|
|
mean value: 0.9967790516906335
|
|
|
|
key: test_recall
|
|
value: [0.96296296 0.92592593 0.85185185 0.81481481 0.88888889 0.92857143
|
|
1. 0.96428571 0.92857143 0.92857143]
|
|
|
|
mean value: 0.9194444444444444
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.99595142 1. 1. 0.99190283]
|
|
|
|
mean value: 0.9987854251012146
|
|
|
|
key: test_roc_auc
|
|
value: [0.9457672 0.87367725 0.89021164 0.88955026 0.9265873 0.9457672
|
|
0.96296296 0.9265873 0.90873016 0.87169312]
|
|
|
|
mean value: 0.9141534391534392
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99797571 0.99797571 1. 0.99797571 0.99798387
|
|
0.99394345 0.99798387 0.99798387 0.99595142]
|
|
|
|
mean value: 0.9977773605850855
|
|
|
|
key: test_jcc
|
|
value: [0.89655172 0.78125 0.79310345 0.78571429 0.85714286 0.89655172
|
|
0.93333333 0.87096774 0.83870968 0.78787879]
|
|
|
|
mean value: 0.8441203579975827
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99598394 0.99598394 1. 0.99598394 0.99596774
|
|
0.98795181 0.99596774 0.99596774 0.99190283]
|
|
|
|
mean value: 0.9955709674272379
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03661227 0.03521848 0.03492141 0.03703976 0.09292889 0.10989189
|
|
0.0939784 0.11065483 0.08957911 0.08409071]
|
|
|
|
mean value: 0.07249157428741455
|
|
|
|
key: score_time
|
|
value: [0.01298571 0.01325226 0.0166471 0.01341295 0.02369738 0.02332997
|
|
0.02798176 0.02037549 0.02094483 0.0294838 ]
|
|
|
|
mean value: 0.020211124420166017
|
|
|
|
key: test_mcc
|
|
value: [-0.00750249 0.30914201 0.3105295 0.22695994 -0.00636336 0.00509277
|
|
0.05719906 0.17063492 0.11066166 0.23968667]
|
|
|
|
mean value: 0.14160406924416138
|
|
|
|
key: train_mcc
|
|
value: [0.33960257 0.29709932 0.31297917 0.30510752 0.33587862 0.31943197
|
|
0.33452427 0.32324555 0.32703143 0.31558957]
|
|
|
|
mean value: 0.3210489986924618
|
|
|
|
key: test_accuracy
|
|
value: [0.49090909 0.61818182 0.58181818 0.56363636 0.49090909 0.50909091
|
|
0.52727273 0.56363636 0.54545455 0.58181818]
|
|
|
|
mean value: 0.5472727272727272
|
|
|
|
key: train_accuracy
|
|
value: [0.6040404 0.58181818 0.58989899 0.58585859 0.6020202 0.59191919
|
|
0.6 0.59393939 0.5959596 0.58989899]
|
|
|
|
mean value: 0.5935353535353536
|
|
|
|
key: test_fscore
|
|
value: [0.62162162 0.70422535 0.7012987 0.68421053 0.63157895 0.65822785
|
|
0.64864865 0.68421053 0.66666667 0.7012987 ]
|
|
|
|
mean value: 0.6701987539748281
|
|
|
|
key: train_fscore
|
|
value: [0.71676301 0.70554765 0.70958512 0.70756063 0.71572872 0.70977011
|
|
0.71387283 0.71079137 0.71181556 0.70875179]
|
|
|
|
mean value: 0.7110186793281063
|
|
|
|
key: test_precision
|
|
value: [0.4893617 0.56818182 0.54 0.53061224 0.48979592 0.50980392
|
|
0.52173913 0.54166667 0.53191489 0.55102041]
|
|
|
|
mean value: 0.5274096704025147
|
|
|
|
key: train_precision
|
|
value: [0.55855856 0.54505495 0.54988914 0.54746137 0.55730337 0.55011136
|
|
0.55505618 0.55133929 0.55257271 0.54888889]
|
|
|
|
mean value: 0.5516235798196619
|
|
|
|
key: test_recall
|
|
value: [0.85185185 0.92592593 1. 0.96296296 0.88888889 0.92857143
|
|
0.85714286 0.92857143 0.89285714 0.96428571]
|
|
|
|
mean value: 0.9201058201058201
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.4973545 0.62367725 0.58928571 0.5707672 0.49801587 0.50132275
|
|
0.52116402 0.55687831 0.53902116 0.57473545]
|
|
|
|
mean value: 0.5472222222222223
|
|
|
|
key: train_roc_auc
|
|
value: [0.60323887 0.58097166 0.58906883 0.58502024 0.60121457 0.59274194
|
|
0.60080645 0.59475806 0.59677419 0.59072581]
|
|
|
|
mean value: 0.5935320621653389
|
|
|
|
key: test_jcc
|
|
value: [0.45098039 0.54347826 0.54 0.52 0.46153846 0.49056604
|
|
0.48 0.52 0.5 0.54 ]
|
|
|
|
mean value: 0.5046563152300738
|
|
|
|
key: train_jcc
|
|
value: [0.55855856 0.54505495 0.54988914 0.54746137 0.55730337 0.55011136
|
|
0.55505618 0.55133929 0.55257271 0.54888889]
|
|
|
|
mean value: 0.5516235798196619
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.47
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06199431 0.05168533 0.05722976 0.05227637 0.05509949 0.05226707
|
|
0.0589335 0.0516386 0.05015492 0.05294251]
|
|
|
|
mean value: 0.054422187805175784
|
|
|
|
key: score_time
|
|
value: [0.03927255 0.03323388 0.04001808 0.03822374 0.0384686 0.04106975
|
|
0.03821731 0.04105067 0.0257442 0.03850818]
|
|
|
|
mean value: 0.03738069534301758
|
|
|
|
key: test_mcc
|
|
value: [0.41798942 0.56349206 0.60000053 0.67328042 0.56042319 0.71588202
|
|
0.71049701 0.63745526 0.60000053 0.67284827]
|
|
|
|
mean value: 0.6151868699667262
|
|
|
|
key: train_mcc
|
|
value: [0.81929299 0.81246913 0.81734388 0.77677176 0.80436311 0.82014274
|
|
0.7750029 0.80352034 0.79584621 0.80737439]
|
|
|
|
mean value: 0.8032127453356476
|
|
|
|
key: test_accuracy
|
|
value: [0.70909091 0.78181818 0.8 0.83636364 0.76363636 0.85454545
|
|
0.85454545 0.81818182 0.8 0.83636364]
|
|
|
|
mean value: 0.8054545454545454
|
|
|
|
key: train_accuracy
|
|
value: [0.90909091 0.90505051 0.90707071 0.88686869 0.9010101 0.90909091
|
|
0.88686869 0.9010101 0.8969697 0.9030303 ]
|
|
|
|
mean value: 0.9006060606060606
|
|
|
|
key: test_fscore
|
|
value: [0.7037037 0.77777778 0.79245283 0.83636364 0.79365079 0.86666667
|
|
0.86206897 0.82758621 0.80701754 0.84210526]
|
|
|
|
mean value: 0.8109393387782594
|
|
|
|
key: train_fscore
|
|
value: [0.91159136 0.90873786 0.91119691 0.89189189 0.90485437 0.91193738
|
|
0.88976378 0.90373281 0.90019569 0.90551181]
|
|
|
|
mean value: 0.9039413864086208
|
|
|
|
key: test_precision
|
|
value: [0.7037037 0.77777778 0.80769231 0.82142857 0.69444444 0.8125
|
|
0.83333333 0.8 0.79310345 0.82758621]
|
|
|
|
mean value: 0.7871569793552552
|
|
|
|
key: train_precision
|
|
value: [0.88888889 0.87640449 0.87407407 0.85555556 0.87265918 0.88257576
|
|
0.86590038 0.8778626 0.87121212 0.88122605]
|
|
|
|
mean value: 0.8746359099919839
|
|
|
|
key: test_recall
|
|
value: [0.7037037 0.77777778 0.77777778 0.85185185 0.92592593 0.92857143
|
|
0.89285714 0.85714286 0.82142857 0.85714286]
|
|
|
|
mean value: 0.8394179894179894
|
|
|
|
key: train_recall
|
|
value: [0.93548387 0.94354839 0.9516129 0.93145161 0.93951613 0.94331984
|
|
0.91497976 0.93117409 0.93117409 0.93117409]
|
|
|
|
mean value: 0.9353434765573985
|
|
|
|
key: test_roc_auc
|
|
value: [0.70899471 0.78174603 0.79960317 0.83664021 0.76653439 0.8531746
|
|
0.85383598 0.81746032 0.79960317 0.83597884]
|
|
|
|
mean value: 0.8053571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.90903748 0.90497257 0.90698054 0.88677844 0.90093215 0.90915992
|
|
0.88692536 0.90107092 0.89703866 0.90308704]
|
|
|
|
mean value: 0.9005983087371033
|
|
|
|
key: test_jcc
|
|
value: [0.54285714 0.63636364 0.65625 0.71875 0.65789474 0.76470588
|
|
0.75757576 0.70588235 0.67647059 0.72727273]
|
|
|
|
mean value: 0.6844022824440781
|
|
|
|
key: train_jcc
|
|
value: [0.83754513 0.83274021 0.83687943 0.80487805 0.82624113 0.8381295
|
|
0.80141844 0.82437276 0.81850534 0.82733813]
|
|
|
|
mean value: 0.8248048119583812
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.50260377 0.47520304 0.46782541 0.47117901 0.52218747 0.5421946
|
|
0.47561955 0.48131919 0.50014997 0.51044011]
|
|
|
|
mean value: 0.49487221240997314
|
|
|
|
key: score_time
|
|
value: [0.03812575 0.03874445 0.04082036 0.03824544 0.04031968 0.03890872
|
|
0.03962612 0.03432417 0.03413749 0.04158163]
|
|
|
|
mean value: 0.038483381271362305
|
|
|
|
key: test_mcc
|
|
value: [0.38227513 0.56349206 0.60000053 0.67328042 0.56042319 0.71588202
|
|
0.71049701 0.63745526 0.60000053 0.67284827]
|
|
|
|
mean value: 0.6116154413952977
|
|
|
|
key: train_mcc
|
|
value: [0.84324285 0.81246913 0.81734388 0.77677176 0.80436311 0.82014274
|
|
0.7750029 0.80352034 0.79584621 0.83468609]
|
|
|
|
mean value: 0.8083389021354033
|
|
|
|
key: test_accuracy
|
|
value: [0.69090909 0.78181818 0.8 0.83636364 0.76363636 0.85454545
|
|
0.85454545 0.81818182 0.8 0.83636364]
|
|
|
|
mean value: 0.8036363636363636
|
|
|
|
key: train_accuracy
|
|
value: [0.92121212 0.90505051 0.90707071 0.88686869 0.9010101 0.90909091
|
|
0.88686869 0.9010101 0.8969697 0.91717172]
|
|
|
|
mean value: 0.9032323232323233
|
|
|
|
key: test_fscore
|
|
value: [0.69090909 0.77777778 0.79245283 0.83636364 0.79365079 0.86666667
|
|
0.86206897 0.82758621 0.80701754 0.84210526]
|
|
|
|
mean value: 0.8096598774987982
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.92307692 0.90873786 0.91119691 0.89189189 0.90485437 0.91193738
|
|
0.88976378 0.90373281 0.90019569 0.91816367]
|
|
|
|
mean value: 0.9063551293194985
|
|
|
|
key: test_precision
|
|
value: [0.67857143 0.77777778 0.80769231 0.82142857 0.69444444 0.8125
|
|
0.83333333 0.8 0.79310345 0.82758621]
|
|
|
|
mean value: 0.7846437518420277
|
|
|
|
key: train_precision
|
|
value: [0.9034749 0.87640449 0.87407407 0.85555556 0.87265918 0.88257576
|
|
0.86590038 0.8778626 0.87121212 0.90551181]
|
|
|
|
mean value: 0.8785230871889629
|
|
|
|
key: test_recall
|
|
value: [0.7037037 0.77777778 0.77777778 0.85185185 0.92592593 0.92857143
|
|
0.89285714 0.85714286 0.82142857 0.85714286]
|
|
|
|
mean value: 0.8394179894179894
|
|
|
|
key: train_recall
|
|
value: [0.94354839 0.94354839 0.9516129 0.93145161 0.93951613 0.94331984
|
|
0.91497976 0.93117409 0.93117409 0.93117409]
|
|
|
|
mean value: 0.9361499281703017
|
|
|
|
key: test_roc_auc
|
|
value: [0.69113757 0.78174603 0.79960317 0.83664021 0.76653439 0.8531746
|
|
0.85383598 0.81746032 0.79960317 0.83597884]
|
|
|
|
mean value: 0.8035714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.92116691 0.90497257 0.90698054 0.88677844 0.90093215 0.90915992
|
|
0.88692536 0.90107092 0.89703866 0.91719995]
|
|
|
|
mean value: 0.9032225414653259
|
|
|
|
key: test_jcc
|
|
value: [0.52777778 0.63636364 0.65625 0.71875 0.65789474 0.76470588
|
|
0.75757576 0.70588235 0.67647059 0.72727273]
|
|
|
|
mean value: 0.6828943459361416
|
|
|
|
key: train_jcc
|
|
value: [0.85714286 0.83274021 0.83687943 0.80487805 0.82624113 0.8381295
|
|
0.80141844 0.82437276 0.81850534 0.84870849]
|
|
|
|
mean value: 0.8289016207961346
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10982728 0.08961582 0.1403172 0.09044671 0.1046319 0.09383154
|
|
0.09380674 0.08423138 0.09636784 0.09398198]
|
|
|
|
mean value: 0.09970583915710449
|
|
|
|
key: score_time
|
|
value: [0.01944542 0.0222249 0.02268076 0.02205539 0.02553725 0.02226377
|
|
0.02227473 0.02308798 0.02336025 0.04442167]
|
|
|
|
mean value: 0.024735212326049805
|
|
|
|
key: test_mcc
|
|
value: [0.60715823 0.55282303 0.60715823 0.70920814 0.47434165 0.50870557
|
|
0.6272288 0.8161102 0.8161102 0.64036711]
|
|
|
|
mean value: 0.6359211182224512
|
|
|
|
key: train_mcc
|
|
value: [0.70971122 0.75988917 0.72149066 0.71236887 0.72504884 0.72414357
|
|
0.73719782 0.70469162 0.6864422 0.7188745 ]
|
|
|
|
mean value: 0.7199858479275736
|
|
|
|
key: test_accuracy
|
|
value: [0.80263158 0.77631579 0.80263158 0.84210526 0.73684211 0.75
|
|
0.81333333 0.90666667 0.90666667 0.81333333]
|
|
|
|
mean value: 0.8150526315789474
|
|
|
|
key: train_accuracy
|
|
value: [0.85441176 0.87941176 0.86029412 0.85588235 0.86176471 0.86176471
|
|
0.86784141 0.85168869 0.84287812 0.85903084]
|
|
|
|
mean value: 0.8594968471970286
|
|
|
|
key: test_fscore
|
|
value: [0.81012658 0.77333333 0.81012658 0.86046512 0.74358974 0.77108434
|
|
0.80555556 0.90909091 0.90410959 0.83333333]
|
|
|
|
mean value: 0.82208150821294
|
|
|
|
key: train_fscore
|
|
value: [0.8579627 0.88252149 0.86370158 0.85878963 0.86609687 0.86455331
|
|
0.87215909 0.85633001 0.84604317 0.86206897]
|
|
|
|
mean value: 0.8630226807134473
|
|
|
|
key: test_precision
|
|
value: [0.7804878 0.78378378 0.7804878 0.77083333 0.725 0.71111111
|
|
0.82857143 0.875 0.94285714 0.76086957]
|
|
|
|
mean value: 0.7959001974630289
|
|
|
|
key: train_precision
|
|
value: [0.83753501 0.8603352 0.84313725 0.84180791 0.83977901 0.84745763
|
|
0.84573003 0.83149171 0.82816901 0.84269663]
|
|
|
|
mean value: 0.8418139390239697
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.76315789 0.84210526 0.97368421 0.76315789 0.84210526
|
|
0.78378378 0.94594595 0.86842105 0.92105263]
|
|
|
|
mean value: 0.854551920341394
|
|
|
|
key: train_recall
|
|
value: [0.87941176 0.90588235 0.88529412 0.87647059 0.89411765 0.88235294
|
|
0.90029326 0.88269795 0.86470588 0.88235294]
|
|
|
|
mean value: 0.8853579437640159
|
|
|
|
key: test_roc_auc
|
|
value: [0.80263158 0.77631579 0.80263158 0.84210526 0.73684211 0.75
|
|
0.81294452 0.9071835 0.9071835 0.81187767]
|
|
|
|
mean value: 0.8149715504978663
|
|
|
|
key: train_roc_auc
|
|
value: [0.85441176 0.87941176 0.86029412 0.85588235 0.86176471 0.86176471
|
|
0.86779369 0.85164309 0.84291013 0.85906503]
|
|
|
|
mean value: 0.8594941348973607
|
|
|
|
key: test_jcc
|
|
value: [0.68085106 0.63043478 0.68085106 0.75510204 0.59183673 0.62745098
|
|
0.6744186 0.83333333 0.825 0.71428571]
|
|
|
|
mean value: 0.7013564318440841
|
|
|
|
key: train_jcc
|
|
value: [0.75125628 0.78974359 0.76010101 0.75252525 0.7638191 0.76142132
|
|
0.77329975 0.74875622 0.73316708 0.75757576]
|
|
|
|
mean value: 0.7591665355937555
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.98789167 2.2662549 2.27067804 2.25698662 2.20915818 2.31741405
|
|
2.09584761 1.99604988 1.960042 1.95785499]
|
|
|
|
mean value: 2.1318177938461305
|
|
|
|
key: score_time
|
|
value: [0.03459048 0.03004456 0.03427267 0.03353262 0.03086877 0.02023816
|
|
0.02075028 0.03499174 0.06430078 0.04073453]
|
|
|
|
mean value: 0.03443245887756348
|
|
|
|
key: test_mcc
|
|
value: [0.71275096 0.55436186 0.68516016 0.89597867 0.5797509 0.59222009
|
|
0.65338095 0.76721166 0.78662873 0.70676174]
|
|
|
|
mean value: 0.6934205740985823
|
|
|
|
key: train_mcc
|
|
value: [0.85919523 0.84154054 0.82727607 0.83600314 0.82495791 0.83983877
|
|
0.83108233 0.81813401 0.85618908 0.83647083]
|
|
|
|
mean value: 0.8370687923537343
|
|
|
|
key: test_accuracy
|
|
value: [0.85526316 0.77631579 0.84210526 0.94736842 0.78947368 0.78947368
|
|
0.82666667 0.88 0.89333333 0.85333333]
|
|
|
|
mean value: 0.8453333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.92941176 0.92058824 0.91323529 0.91764706 0.91176471 0.91911765
|
|
0.91483113 0.90895742 0.92804699 0.91776799]
|
|
|
|
mean value: 0.9181368230111427
|
|
|
|
key: test_fscore
|
|
value: [0.84931507 0.78481013 0.83783784 0.94871795 0.79487179 0.80952381
|
|
0.82191781 0.88607595 0.89473684 0.85714286]
|
|
|
|
mean value: 0.8484950042861208
|
|
|
|
key: train_fscore
|
|
value: [0.93043478 0.92173913 0.91510791 0.91930836 0.91428571 0.92154066
|
|
0.91737892 0.91014493 0.92846715 0.91954023]
|
|
|
|
mean value: 0.919794778263726
|
|
|
|
key: test_precision
|
|
value: [0.88571429 0.75609756 0.86111111 0.925 0.775 0.73913043
|
|
0.83333333 0.83333333 0.89473684 0.84615385]
|
|
|
|
mean value: 0.8349610747509392
|
|
|
|
key: train_precision
|
|
value: [0.91714286 0.90857143 0.89577465 0.90112994 0.88888889 0.89473684
|
|
0.89196676 0.89971347 0.92173913 0.8988764 ]
|
|
|
|
mean value: 0.9018540369079232
|
|
|
|
key: test_recall
|
|
value: [0.81578947 0.81578947 0.81578947 0.97368421 0.81578947 0.89473684
|
|
0.81081081 0.94594595 0.89473684 0.86842105]
|
|
|
|
mean value: 0.865149359886202
|
|
|
|
key: train_recall
|
|
value: [0.94411765 0.93529412 0.93529412 0.93823529 0.94117647 0.95
|
|
0.94428152 0.92082111 0.93529412 0.94117647]
|
|
|
|
mean value: 0.9385690874590306
|
|
|
|
key: test_roc_auc
|
|
value: [0.85526316 0.77631579 0.84210526 0.94736842 0.78947368 0.78947368
|
|
0.82645804 0.88086771 0.89331437 0.85312945]
|
|
|
|
mean value: 0.8453769559032718
|
|
|
|
key: train_roc_auc
|
|
value: [0.92941176 0.92058824 0.91323529 0.91764706 0.91176471 0.91911765
|
|
0.91478782 0.90893997 0.92805762 0.91780231]
|
|
|
|
mean value: 0.9181352423667414
|
|
|
|
key: test_jcc
|
|
value: [0.73809524 0.64583333 0.72093023 0.90243902 0.65957447 0.68
|
|
0.69767442 0.79545455 0.80952381 0.75 ]
|
|
|
|
mean value: 0.7399525070045068
|
|
|
|
key: train_jcc
|
|
value: [0.8699187 0.85483871 0.84350133 0.85066667 0.84210526 0.85449735
|
|
0.84736842 0.83510638 0.86648501 0.85106383]
|
|
|
|
mean value: 0.8515551666888841
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02018547 0.01305509 0.01273274 0.0127759 0.01259232 0.01216793
|
|
0.01206374 0.01258063 0.01179004 0.01284099]
|
|
|
|
mean value: 0.013278484344482422
|
|
|
|
key: score_time
|
|
value: [0.01289845 0.01067424 0.01081204 0.01007533 0.0105896 0.0096004
|
|
0.00983787 0.01012707 0.01009345 0.01081634]
|
|
|
|
mean value: 0.010552477836608887
|
|
|
|
key: test_mcc
|
|
value: [0.33282012 0.3981989 0.31755367 0.36842105 0.29451375 0.54554473
|
|
0.46657183 0.38691323 0.57325747 0.54694168]
|
|
|
|
mean value: 0.42307364397303315
|
|
|
|
key: train_mcc
|
|
value: [0.41643157 0.42629924 0.52124019 0.47713971 0.50644343 0.52124019
|
|
0.51842236 0.47213419 0.46955258 0.49879111]
|
|
|
|
mean value: 0.4827694578890911
|
|
|
|
key: test_accuracy
|
|
value: [0.65789474 0.69736842 0.65789474 0.68421053 0.64473684 0.76315789
|
|
0.73333333 0.69333333 0.78666667 0.77333333]
|
|
|
|
mean value: 0.7091929824561404
|
|
|
|
key: train_accuracy
|
|
value: [0.69264706 0.70147059 0.76029412 0.73823529 0.75294118 0.76029412
|
|
0.75917768 0.73568282 0.73421439 0.74889868]
|
|
|
|
mean value: 0.7383855921223115
|
|
|
|
key: test_fscore
|
|
value: [0.59375 0.71604938 0.675 0.68421053 0.6746988 0.79069767
|
|
0.72972973 0.69333333 0.78947368 0.77333333]
|
|
|
|
mean value: 0.7120276459238088
|
|
|
|
key: train_fscore
|
|
value: [0.62068966 0.74336283 0.7661406 0.74498567 0.75862069 0.7661406
|
|
0.76162791 0.74358974 0.74253201 0.75606277]
|
|
|
|
mean value: 0.7403752478934846
|
|
|
|
key: test_precision
|
|
value: [0.73076923 0.6744186 0.64285714 0.68421053 0.62222222 0.70833333
|
|
0.72972973 0.68421053 0.78947368 0.78378378]
|
|
|
|
mean value: 0.705000878418871
|
|
|
|
key: train_precision
|
|
value: [0.81042654 0.6518847 0.74789916 0.72625698 0.74157303 0.74789916
|
|
0.75504323 0.72299169 0.71900826 0.73407202]
|
|
|
|
mean value: 0.7357054781265242
|
|
|
|
key: test_recall
|
|
value: [0.5 0.76315789 0.71052632 0.68421053 0.73684211 0.89473684
|
|
0.72972973 0.7027027 0.78947368 0.76315789]
|
|
|
|
mean value: 0.7274537695590327
|
|
|
|
key: train_recall
|
|
value: [0.50294118 0.86470588 0.78529412 0.76470588 0.77647059 0.78529412
|
|
0.76832845 0.76539589 0.76764706 0.77941176]
|
|
|
|
mean value: 0.7560194928411247
|
|
|
|
key: test_roc_auc
|
|
value: [0.65789474 0.69736842 0.65789474 0.68421053 0.64473684 0.76315789
|
|
0.73328592 0.69345661 0.78662873 0.77347084]
|
|
|
|
mean value: 0.7092105263157894
|
|
|
|
key: train_roc_auc
|
|
value: [0.69264706 0.70147059 0.76029412 0.73823529 0.75294118 0.76029412
|
|
0.75916422 0.73563912 0.73426341 0.74894342]
|
|
|
|
mean value: 0.7383892530619286
|
|
|
|
key: test_jcc
|
|
value: [0.42222222 0.55769231 0.50943396 0.52 0.50909091 0.65384615
|
|
0.57446809 0.53061224 0.65217391 0.63043478]
|
|
|
|
mean value: 0.555997458077226
|
|
|
|
key: train_jcc
|
|
value: [0.45 0.5915493 0.62093023 0.59360731 0.61111111 0.62093023
|
|
0.61502347 0.59183673 0.59049774 0.60779817]
|
|
|
|
mean value: 0.5893284289504568
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01308656 0.01443648 0.01592875 0.0163331 0.01624155 0.01627684
|
|
0.01662326 0.01656532 0.01657724 0.01660323]
|
|
|
|
mean value: 0.015867233276367188
|
|
|
|
key: score_time
|
|
value: [0.01069379 0.0130043 0.01258349 0.01298976 0.01300502 0.01304913
|
|
0.01299644 0.01304674 0.01306272 0.01319098]
|
|
|
|
mean value: 0.012762236595153808
|
|
|
|
key: test_mcc
|
|
value: [0.47633051 0.36893239 0.39597276 0.53300179 0.34222378 0.2409658
|
|
0.3599438 0.44287262 0.46761578 0.36654993]
|
|
|
|
mean value: 0.39944091685365757
|
|
|
|
key: train_mcc
|
|
value: [0.53379791 0.51434288 0.55680795 0.55715863 0.53711859 0.49211605
|
|
0.49954902 0.49236773 0.52972516 0.48355201]
|
|
|
|
mean value: 0.5196535916488364
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.68421053 0.69736842 0.76315789 0.67105263 0.61842105
|
|
0.68 0.72 0.73333333 0.68 ]
|
|
|
|
mean value: 0.6984385964912281
|
|
|
|
key: train_accuracy
|
|
value: [0.76617647 0.75588235 0.77647059 0.77794118 0.76764706 0.74558824
|
|
0.74889868 0.74596182 0.76358297 0.74155653]
|
|
|
|
mean value: 0.7589705882352942
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.67567568 0.70886076 0.7804878 0.67532468 0.65060241
|
|
0.66666667 0.69565217 0.72972973 0.71428571]
|
|
|
|
mean value: 0.7047285609605779
|
|
|
|
key: train_fscore
|
|
value: [0.77446809 0.767507 0.78888889 0.78520626 0.77683616 0.7532097
|
|
0.75949367 0.7517934 0.77419355 0.74639769]
|
|
|
|
mean value: 0.7677994408391525
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.69444444 0.68292683 0.72727273 0.66666667 0.6
|
|
0.68571429 0.75 0.75 0.65217391]
|
|
|
|
mean value: 0.6923484580695609
|
|
|
|
key: train_precision
|
|
value: [0.74794521 0.73262032 0.74736842 0.76033058 0.74728261 0.73130194
|
|
0.72972973 0.73595506 0.73994638 0.73163842]
|
|
|
|
mean value: 0.7404118658339571
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.65789474 0.73684211 0.84210526 0.68421053 0.71052632
|
|
0.64864865 0.64864865 0.71052632 0.78947368]
|
|
|
|
mean value: 0.7218349928876244
|
|
|
|
key: train_recall
|
|
value: [0.80294118 0.80588235 0.83529412 0.81176471 0.80882353 0.77647059
|
|
0.79178886 0.76832845 0.81176471 0.76176471]
|
|
|
|
mean value: 0.7974823184405727
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.68421053 0.69736842 0.76315789 0.67105263 0.61842105
|
|
0.67958748 0.71906117 0.73364154 0.67852063]
|
|
|
|
mean value: 0.6981863442389759
|
|
|
|
key: train_roc_auc
|
|
value: [0.76617647 0.75588235 0.77647059 0.77794118 0.76764706 0.74558824
|
|
0.7488356 0.74592893 0.76365361 0.74158617]
|
|
|
|
mean value: 0.7589710194928411
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.51020408 0.54901961 0.64 0.50980392 0.48214286
|
|
0.5 0.53333333 0.57446809 0.55555556]
|
|
|
|
mean value: 0.5454527442182547
|
|
|
|
key: train_jcc
|
|
value: [0.63194444 0.62272727 0.65137615 0.64637002 0.63510393 0.60411899
|
|
0.6122449 0.60229885 0.63157895 0.5954023 ]
|
|
|
|
mean value: 0.6233165801364813
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.02674937 0.02765059 0.03468895 0.03487515 0.03414822 0.02609992
|
|
0.03097677 0.02569985 0.01612234 0.03866339]
|
|
|
|
mean value: 0.029567456245422362
|
|
|
|
key: score_time
|
|
value: [0.04836774 0.05574822 0.05374694 0.07118464 0.06189799 0.06573033
|
|
0.04920173 0.06797957 0.05703449 0.06322455]
|
|
|
|
mean value: 0.05941162109375
|
|
|
|
key: test_mcc
|
|
value: [0.47434165 0.44752341 0.50870557 0.40160966 0.45129209 0.37310125
|
|
0.33357041 0.3064868 0.44914911 0.3884542 ]
|
|
|
|
mean value: 0.41342341641703867
|
|
|
|
key: train_mcc
|
|
value: [0.63556908 0.62440506 0.62201307 0.64681178 0.67481782 0.62790893
|
|
0.6514998 0.62999224 0.62875514 0.63925201]
|
|
|
|
mean value: 0.6381024924335518
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.72368421 0.75 0.69736842 0.72368421 0.68421053
|
|
0.66666667 0.65333333 0.72 0.69333333]
|
|
|
|
mean value: 0.7049122807017544
|
|
|
|
key: train_accuracy
|
|
value: [0.81764706 0.81176471 0.81029412 0.82205882 0.83676471 0.81323529
|
|
0.82525698 0.81497797 0.81350954 0.81938326]
|
|
|
|
mean value: 0.8184892459186318
|
|
|
|
key: test_fscore
|
|
value: [0.74358974 0.72 0.77108434 0.72289157 0.74074074 0.70731707
|
|
0.66666667 0.63888889 0.69565217 0.68493151]
|
|
|
|
mean value: 0.7091762697433588
|
|
|
|
key: train_fscore
|
|
value: [0.82028986 0.81661891 0.81650071 0.82981716 0.84165478 0.81934566
|
|
0.83024251 0.81632653 0.81985816 0.82251082]
|
|
|
|
mean value: 0.8233165096604551
|
|
|
|
key: test_precision
|
|
value: [0.725 0.72972973 0.71111111 0.66666667 0.69767442 0.65909091
|
|
0.65789474 0.65714286 0.77419355 0.71428571]
|
|
|
|
mean value: 0.6992789691860841
|
|
|
|
key: train_precision
|
|
value: [0.80857143 0.79608939 0.79063361 0.79514825 0.81717452 0.79338843
|
|
0.80833333 0.8115942 0.79178082 0.80736544]
|
|
|
|
mean value: 0.8020079413070853
|
|
|
|
key: test_recall
|
|
value: [0.76315789 0.71052632 0.84210526 0.78947368 0.78947368 0.76315789
|
|
0.67567568 0.62162162 0.63157895 0.65789474]
|
|
|
|
mean value: 0.7244665718349929
|
|
|
|
key: train_recall
|
|
value: [0.83235294 0.83823529 0.84411765 0.86764706 0.86764706 0.84705882
|
|
0.85337243 0.82111437 0.85 0.83823529]
|
|
|
|
mean value: 0.845978092116612
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.72368421 0.75 0.69736842 0.72368421 0.68421053
|
|
0.66678521 0.65291607 0.72119488 0.69381223]
|
|
|
|
mean value: 0.705049786628734
|
|
|
|
key: train_roc_auc
|
|
value: [0.81764706 0.81176471 0.81029412 0.82205882 0.83676471 0.81323529
|
|
0.82521563 0.81496895 0.81356305 0.8194109 ]
|
|
|
|
mean value: 0.8184923236156634
|
|
|
|
key: test_jcc
|
|
value: [0.59183673 0.5625 0.62745098 0.56603774 0.58823529 0.54716981
|
|
0.5 0.46938776 0.53333333 0.52083333]
|
|
|
|
mean value: 0.5506784978142201
|
|
|
|
key: train_jcc
|
|
value: [0.6953317 0.69007264 0.68990385 0.70913462 0.72660099 0.6939759
|
|
0.7097561 0.68965517 0.69471154 0.69852941]
|
|
|
|
mean value: 0.6997671905132484
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0802989 0.05679274 0.05832553 0.05484366 0.05425358 0.05509114
|
|
0.05581689 0.0564189 0.05558991 0.05494356]
|
|
|
|
mean value: 0.05823748111724854
|
|
|
|
key: score_time
|
|
value: [0.02570128 0.02603674 0.02556634 0.0245285 0.02406287 0.02425098
|
|
0.02446532 0.02450919 0.02433729 0.0238986 ]
|
|
|
|
mean value: 0.02473571300506592
|
|
|
|
key: test_mcc
|
|
value: [0.52704628 0.63245553 0.55436186 0.6000992 0.50870557 0.35212384
|
|
0.62660028 0.65500602 0.70857794 0.60970498]
|
|
|
|
mean value: 0.577468149374876
|
|
|
|
key: train_mcc
|
|
value: [0.67008899 0.70627575 0.69625733 0.69530583 0.70810852 0.70113995
|
|
0.69024682 0.65354086 0.66717294 0.67852687]
|
|
|
|
mean value: 0.6866663861231256
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.81578947 0.77631579 0.78947368 0.75 0.67105263
|
|
0.81333333 0.82666667 0.85333333 0.8 ]
|
|
|
|
mean value: 0.7859122807017545
|
|
|
|
key: train_accuracy
|
|
value: [0.83382353 0.85147059 0.84558824 0.84558824 0.85147059 0.84705882
|
|
0.84287812 0.82525698 0.83113069 0.83700441]
|
|
|
|
mean value: 0.8411270190895741
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.82051282 0.78481013 0.81395349 0.77108434 0.70588235
|
|
0.81081081 0.81690141 0.86075949 0.81927711]
|
|
|
|
mean value: 0.7973222716354671
|
|
|
|
key: train_fscore
|
|
value: [0.84062059 0.85834502 0.85436893 0.85355649 0.85991678 0.85714286
|
|
0.85159501 0.83356643 0.84049931 0.84561892]
|
|
|
|
mean value: 0.8495230332385642
|
|
|
|
key: test_precision
|
|
value: [0.75 0.8 0.75609756 0.72916667 0.71111111 0.63829787
|
|
0.81081081 0.85294118 0.82926829 0.75555556]
|
|
|
|
mean value: 0.7633249046613695
|
|
|
|
key: train_precision
|
|
value: [0.80758808 0.82037534 0.80839895 0.81167109 0.81364829 0.80412371
|
|
0.80789474 0.79679144 0.79527559 0.80211082]
|
|
|
|
mean value: 0.8067878043154759
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.84210526 0.81578947 0.92105263 0.84210526 0.78947368
|
|
0.81081081 0.78378378 0.89473684 0.89473684]
|
|
|
|
mean value: 0.8384068278805121
|
|
|
|
key: train_recall
|
|
value: [0.87647059 0.9 0.90588235 0.9 0.91176471 0.91764706
|
|
0.90029326 0.87390029 0.89117647 0.89411765]
|
|
|
|
mean value: 0.8971252371916508
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.81578947 0.77631579 0.78947368 0.75 0.67105263
|
|
0.81330014 0.82610242 0.85277383 0.79871977]
|
|
|
|
mean value: 0.7856685633001422
|
|
|
|
key: train_roc_auc
|
|
value: [0.83382353 0.85147059 0.84558824 0.84558824 0.85147059 0.84705882
|
|
0.84279369 0.82518544 0.83121873 0.83708815]
|
|
|
|
mean value: 0.8411286010005175
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.69565217 0.64583333 0.68627451 0.62745098 0.54545455
|
|
0.68181818 0.69047619 0.75555556 0.69387755]
|
|
|
|
mean value: 0.6647393021767336
|
|
|
|
key: train_jcc
|
|
value: [0.72506083 0.75184275 0.74576271 0.74452555 0.75425791 0.75
|
|
0.74154589 0.7146283 0.72488038 0.73253012]
|
|
|
|
mean value: 0.7385034440284566
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.125808 3.88597608 3.33935761 3.23725915 3.05539298 5.10852599
|
|
3.69062066 6.42505813 3.13469172 4.00817013]
|
|
|
|
mean value: 3.9010860443115236
|
|
|
|
key: score_time
|
|
value: [0.02064943 0.02351236 0.02030373 0.0210588 0.01895785 0.01998782
|
|
0.02116323 0.02044129 0.02919769 0.02007508]
|
|
|
|
mean value: 0.02153472900390625
|
|
|
|
key: test_mcc
|
|
value: [0.61057165 0.55436186 0.66934944 0.77644535 0.60715823 0.53300179
|
|
0.57437737 0.73786392 0.65362731 0.57325747]
|
|
|
|
mean value: 0.6290014395651344
|
|
|
|
key: train_mcc
|
|
value: [0.88333157 0.89911842 0.89752867 0.90006229 0.84573237 0.93529412
|
|
0.88643357 0.96182475 0.85464478 0.87678467]
|
|
|
|
mean value: 0.894075520512985
|
|
|
|
key: test_accuracy
|
|
value: [0.80263158 0.77631579 0.82894737 0.88157895 0.80263158 0.76315789
|
|
0.78666667 0.86666667 0.82666667 0.78666667]
|
|
|
|
mean value: 0.8121929824561404
|
|
|
|
key: train_accuracy
|
|
value: [0.94117647 0.94852941 0.94852941 0.95 0.92205882 0.96764706
|
|
0.94126285 0.98091043 0.92657856 0.93832599]
|
|
|
|
mean value: 0.9465019003195991
|
|
|
|
key: test_fscore
|
|
value: [0.81481481 0.78481013 0.84337349 0.89156627 0.81012658 0.7804878
|
|
0.78947368 0.87179487 0.82666667 0.78947368]
|
|
|
|
mean value: 0.8202587994472359
|
|
|
|
key: train_fscore
|
|
value: [0.94252874 0.95021337 0.94934877 0.9502924 0.92439372 0.96764706
|
|
0.94397759 0.98096633 0.92857143 0.93768546]
|
|
|
|
mean value: 0.9475624861118828
|
|
|
|
key: test_precision
|
|
value: [0.76744186 0.75609756 0.77777778 0.82222222 0.7804878 0.72727273
|
|
0.76923077 0.82926829 0.83783784 0.78947368]
|
|
|
|
mean value: 0.7857110537553562
|
|
|
|
key: train_precision
|
|
value: [0.92134831 0.92011019 0.93447293 0.94476744 0.89750693 0.96764706
|
|
0.90348525 0.97953216 0.90277778 0.94610778]
|
|
|
|
mean value: 0.9317755848452187
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.81578947 0.92105263 0.97368421 0.84210526 0.84210526
|
|
0.81081081 0.91891892 0.81578947 0.78947368]
|
|
|
|
mean value: 0.8598150782361309
|
|
|
|
key: train_recall
|
|
value: [0.96470588 0.98235294 0.96470588 0.95588235 0.95294118 0.96764706
|
|
0.98826979 0.98240469 0.95588235 0.92941176]
|
|
|
|
mean value: 0.9644203898568225
|
|
|
|
key: test_roc_auc
|
|
value: [0.80263158 0.77631579 0.82894737 0.88157895 0.80263158 0.76315789
|
|
0.78698435 0.8673542 0.82681366 0.78662873]
|
|
|
|
mean value: 0.8123044096728308
|
|
|
|
key: train_roc_auc
|
|
value: [0.94117647 0.94852941 0.94852941 0.95 0.92205882 0.96764706
|
|
0.94119372 0.98090823 0.92662153 0.93831292]
|
|
|
|
mean value: 0.9464977574607556
|
|
|
|
key: test_jcc
|
|
value: [0.6875 0.64583333 0.72916667 0.80434783 0.68085106 0.64
|
|
0.65217391 0.77272727 0.70454545 0.65217391]
|
|
|
|
mean value: 0.6969319443276427
|
|
|
|
key: train_jcc
|
|
value: [0.89130435 0.90514905 0.90358127 0.90529248 0.85941645 0.93732194
|
|
0.8938992 0.96264368 0.86666667 0.88268156]
|
|
|
|
mean value: 0.9007956641905576
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06624913 0.0556736 0.05277705 0.05857944 0.05848026 0.07034683
|
|
0.04920125 0.04909158 0.04846334 0.05035853]
|
|
|
|
mean value: 0.05592210292816162
|
|
|
|
key: score_time
|
|
value: [0.01314282 0.01308179 0.0130949 0.01314974 0.01289868 0.02340746
|
|
0.01266885 0.0129981 0.01308608 0.01307988]
|
|
|
|
mean value: 0.01406083106994629
|
|
|
|
key: test_mcc
|
|
value: [0.8183437 0.73786479 0.92137172 0.79388419 0.84210526 0.79056942
|
|
0.77409621 0.89857968 0.85123569 0.71247852]
|
|
|
|
mean value: 0.8140529175886273
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.90789474 0.86842105 0.96052632 0.89473684 0.92105263 0.89473684
|
|
0.88 0.94666667 0.92 0.85333333]
|
|
|
|
mean value: 0.9047368421052632
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90410959 0.87179487 0.96103896 0.9 0.92105263 0.8974359
|
|
0.88888889 0.94871795 0.91428571 0.86419753]
|
|
|
|
mean value: 0.9071522033646523
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94285714 0.85 0.94871795 0.85714286 0.92105263 0.875
|
|
0.81818182 0.90243902 1. 0.81395349]
|
|
|
|
mean value: 0.8929344911241051
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.89473684 0.97368421 0.94736842 0.92105263 0.92105263
|
|
0.97297297 1. 0.84210526 0.92105263]
|
|
|
|
mean value: 0.92624466571835
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90789474 0.86842105 0.96052632 0.89473684 0.92105263 0.89473684
|
|
0.88122333 0.94736842 0.92105263 0.85241821]
|
|
|
|
mean value: 0.9049431009957326
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.825 0.77272727 0.925 0.81818182 0.85365854 0.81395349
|
|
0.8 0.90243902 0.84210526 0.76086957]
|
|
|
|
mean value: 0.831393496863208
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.2343452 0.22954488 0.23199201 0.22714138 0.22376323 0.22496653
|
|
0.28836441 0.22837901 0.33268142 0.22716761]
|
|
|
|
mean value: 0.244834566116333
|
|
|
|
key: score_time
|
|
value: [0.02604103 0.02597666 0.02631664 0.0255394 0.02552819 0.02560306
|
|
0.0257988 0.02566075 0.02647042 0.02588677]
|
|
|
|
mean value: 0.02588217258453369
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.46737879 0.60547285 0.66934944 0.60547285 0.57894737
|
|
0.62660028 0.62660028 0.81365576 0.68339862]
|
|
|
|
mean value: 0.630933177423598
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.72368421 0.80263158 0.82894737 0.80263158 0.78947368
|
|
0.81333333 0.81333333 0.90666667 0.84 ]
|
|
|
|
mean value: 0.8136491228070175
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82051282 0.67692308 0.80519481 0.84337349 0.8 0.78947368
|
|
0.81081081 0.81081081 0.90666667 0.85 ]
|
|
|
|
mean value: 0.8113766169105421
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.81481481 0.79487179 0.77777778 0.81081081 0.78947368
|
|
0.81081081 0.81081081 0.91891892 0.80952381]
|
|
|
|
mean value: 0.8137813232550075
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.57894737 0.81578947 0.92105263 0.78947368 0.78947368
|
|
0.81081081 0.81081081 0.89473684 0.89473684]
|
|
|
|
mean value: 0.8147937411095306
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.72368421 0.80263158 0.82894737 0.80263158 0.78947368
|
|
0.81330014 0.81330014 0.90682788 0.83926031]
|
|
|
|
mean value: 0.8135846372688478
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.69565217 0.51162791 0.67391304 0.72916667 0.66666667 0.65217391
|
|
0.68181818 0.68181818 0.82926829 0.73913043]
|
|
|
|
mean value: 0.6861235461846759
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01622486 0.01615024 0.01622295 0.01626086 0.0163157 0.01620007
|
|
0.01640821 0.01638412 0.01648927 0.01617265]
|
|
|
|
mean value: 0.01628289222717285
|
|
|
|
key: score_time
|
|
value: [0.01252675 0.01257443 0.01253557 0.01264286 0.01262879 0.01260853
|
|
0.01256275 0.01263571 0.01256704 0.01246357]
|
|
|
|
mean value: 0.012574601173400878
|
|
|
|
key: test_mcc
|
|
value: [0.21170245 0.35212384 0.36893239 0.5383819 0.34510572 0.48454371
|
|
0.4953682 0.48593799 0.51991465 0.36654993]
|
|
|
|
mean value: 0.4168560787984107
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.60526316 0.67105263 0.68421053 0.76315789 0.67105263 0.73684211
|
|
0.74666667 0.73333333 0.76 0.68 ]
|
|
|
|
mean value: 0.7051578947368421
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.62686567 0.69230769 0.78571429 0.69135802 0.76190476
|
|
0.75324675 0.76190476 0.76315789 0.71428571]
|
|
|
|
mean value: 0.717574556043396
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5952381 0.72413793 0.675 0.7173913 0.65116279 0.69565217
|
|
0.725 0.68085106 0.76315789 0.65217391]
|
|
|
|
mean value: 0.687976516684123
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.65789474 0.55263158 0.71052632 0.86842105 0.73684211 0.84210526
|
|
0.78378378 0.86486486 0.76315789 0.78947368]
|
|
|
|
mean value: 0.7569701280227596
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.60526316 0.67105263 0.68421053 0.76315789 0.67105263 0.73684211
|
|
0.74715505 0.73506401 0.75995733 0.67852063]
|
|
|
|
mean value: 0.7052275960170697
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.45652174 0.52941176 0.64705882 0.52830189 0.61538462
|
|
0.60416667 0.61538462 0.61702128 0.55555556]
|
|
|
|
mean value: 0.5623352398290834
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.59016013 2.43340111 2.37794232 2.3539238 2.3487556 2.34586096
|
|
2.35762548 2.30417991 2.32716799 2.32630777]
|
|
|
|
mean value: 2.476532506942749
|
|
|
|
key: score_time
|
|
value: [0.09689355 0.09765291 0.09774446 0.1016047 0.09715796 0.09771132
|
|
0.09913492 0.09694338 0.09686327 0.09760833]
|
|
|
|
mean value: 0.09793148040771485
|
|
|
|
key: test_mcc
|
|
value: [0.73786479 0.84327404 0.8183437 0.79388419 0.8183437 0.79056942
|
|
0.86956721 0.84500776 0.89466215 0.7341428 ]
|
|
|
|
mean value: 0.8145659749383379
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.92105263 0.90789474 0.89473684 0.90789474 0.89473684
|
|
0.93333333 0.92 0.94666667 0.86666667]
|
|
|
|
mean value: 0.906140350877193
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.87179487 0.91891892 0.91139241 0.9 0.91139241 0.89189189
|
|
0.93506494 0.92307692 0.94594595 0.87179487]
|
|
|
|
mean value: 0.9081273168614941
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85 0.94444444 0.87804878 0.85714286 0.87804878 0.91666667
|
|
0.9 0.87804878 0.97222222 0.85 ]
|
|
|
|
mean value: 0.8924622531939606
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.89473684 0.94736842 0.94736842 0.94736842 0.86842105
|
|
0.97297297 0.97297297 0.92105263 0.89473684]
|
|
|
|
mean value: 0.9261735419630156
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86842105 0.92105263 0.90789474 0.89473684 0.90789474 0.89473684
|
|
0.93385491 0.92069701 0.9470128 0.86628734]
|
|
|
|
mean value: 0.9062588904694168
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77272727 0.85 0.8372093 0.81818182 0.8372093 0.80487805
|
|
0.87804878 0.85714286 0.8974359 0.77272727]
|
|
|
|
mean value: 0.8325560552134574
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.99721408 1.01294684 1.02325273 1.04895473 1.04307055 1.8648448
|
|
1.1672833 1.08559823 1.08962488 1.03646708]
|
|
|
|
mean value: 1.136925721168518
|
|
|
|
key: score_time
|
|
value: [0.24457765 0.24101067 0.20914412 0.16088748 0.32551408 0.12056327
|
|
0.14562988 0.17096281 0.19131255 0.20360041]
|
|
|
|
mean value: 0.20132029056549072
|
|
|
|
key: test_mcc
|
|
value: [0.76342228 0.84327404 0.8468098 0.79388419 0.79388419 0.79056942
|
|
0.84128135 0.82093797 0.82093797 0.73712493]
|
|
|
|
mean value: 0.8052126129506643
|
|
|
|
key: train_mcc
|
|
value: [0.93020068 0.92714855 0.92695584 0.92999118 0.9212619 0.93020068
|
|
0.93277543 0.9153047 0.9272585 0.92442777]
|
|
|
|
mean value: 0.9265525229124193
|
|
|
|
key: test_accuracy
|
|
value: [0.88157895 0.92105263 0.92105263 0.89473684 0.89473684 0.89473684
|
|
0.92 0.90666667 0.90666667 0.86666667]
|
|
|
|
mean value: 0.9007894736842105
|
|
|
|
key: train_accuracy
|
|
value: [0.96470588 0.96323529 0.96323529 0.96470588 0.96029412 0.96470588
|
|
0.96622614 0.95741557 0.96328928 0.96182085]
|
|
|
|
mean value: 0.9629634188477153
|
|
|
|
key: test_fscore
|
|
value: [0.88311688 0.91891892 0.925 0.9 0.9 0.8974359
|
|
0.92105263 0.91139241 0.90140845 0.875 ]
|
|
|
|
mean value: 0.9033325186818164
|
|
|
|
key: train_fscore
|
|
value: [0.96541787 0.96392496 0.96382055 0.96531792 0.96103896 0.96541787
|
|
0.96671491 0.95815296 0.96392496 0.96253602]
|
|
|
|
mean value: 0.9636266979903134
|
|
|
|
key: test_precision
|
|
value: [0.87179487 0.94444444 0.88095238 0.85714286 0.85714286 0.875
|
|
0.8974359 0.85714286 0.96969697 0.83333333]
|
|
|
|
mean value: 0.884408646908647
|
|
|
|
key: train_precision
|
|
value: [0.94632768 0.94617564 0.94871795 0.94886364 0.94334278 0.94632768
|
|
0.95428571 0.94318182 0.94617564 0.94350282]
|
|
|
|
mean value: 0.9466901360631015
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.89473684 0.97368421 0.94736842 0.94736842 0.92105263
|
|
0.94594595 0.97297297 0.84210526 0.92105263]
|
|
|
|
mean value: 0.9261024182076814
|
|
|
|
key: train_recall
|
|
value: [0.98529412 0.98235294 0.97941176 0.98235294 0.97941176 0.98529412
|
|
0.97947214 0.97360704 0.98235294 0.98235294]
|
|
|
|
mean value: 0.9811902708297395
|
|
|
|
key: test_roc_auc
|
|
value: [0.88157895 0.92105263 0.92105263 0.89473684 0.89473684 0.89473684
|
|
0.92034139 0.90753912 0.90753912 0.86593172]
|
|
|
|
mean value: 0.9009246088193457
|
|
|
|
key: train_roc_auc
|
|
value: [0.96470588 0.96323529 0.96323529 0.96470588 0.96029412 0.96470588
|
|
0.96620666 0.95739175 0.96331723 0.96185096]
|
|
|
|
mean value: 0.9629648956356737
|
|
|
|
key: test_jcc
|
|
value: [0.79069767 0.85 0.86046512 0.81818182 0.81818182 0.81395349
|
|
0.85365854 0.8372093 0.82051282 0.77777778]
|
|
|
|
mean value: 0.8240638352634949
|
|
|
|
key: train_jcc
|
|
value: [0.93314763 0.93036212 0.9301676 0.93296089 0.925 0.93314763
|
|
0.93557423 0.91966759 0.93036212 0.92777778]
|
|
|
|
mean value: 0.9298167587724709
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01270723 0.01266479 0.01307917 0.01256824 0.01252437 0.01249313
|
|
0.01269579 0.01269126 0.01251435 0.01329732]
|
|
|
|
mean value: 0.012723565101623535
|
|
|
|
key: score_time
|
|
value: [0.01030993 0.01030421 0.00950694 0.01019931 0.01010823 0.01005173
|
|
0.01017547 0.01013899 0.0100975 0.01014495]
|
|
|
|
mean value: 0.010103726387023925
|
|
|
|
key: test_mcc
|
|
value: [0.47633051 0.36893239 0.39597276 0.53300179 0.34222378 0.2409658
|
|
0.3599438 0.44287262 0.46761578 0.36654993]
|
|
|
|
mean value: 0.39944091685365757
|
|
|
|
key: train_mcc
|
|
value: [0.53379791 0.51434288 0.55680795 0.55715863 0.53711859 0.49211605
|
|
0.49954902 0.49236773 0.52972516 0.48355201]
|
|
|
|
mean value: 0.5196535916488364
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.68421053 0.69736842 0.76315789 0.67105263 0.61842105
|
|
0.68 0.72 0.73333333 0.68 ]
|
|
|
|
mean value: 0.6984385964912281
|
|
|
|
key: train_accuracy
|
|
value: [0.76617647 0.75588235 0.77647059 0.77794118 0.76764706 0.74558824
|
|
0.74889868 0.74596182 0.76358297 0.74155653]
|
|
|
|
mean value: 0.7589705882352942
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.67567568 0.70886076 0.7804878 0.67532468 0.65060241
|
|
0.66666667 0.69565217 0.72972973 0.71428571]
|
|
|
|
mean value: 0.7047285609605779
|
|
|
|
key: train_fscore
|
|
value: [0.77446809 0.767507 0.78888889 0.78520626 0.77683616 0.7532097
|
|
0.75949367 0.7517934 0.77419355 0.74639769]
|
|
|
|
mean value: 0.7677994408391525
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.69444444 0.68292683 0.72727273 0.66666667 0.6
|
|
0.68571429 0.75 0.75 0.65217391]
|
|
|
|
mean value: 0.6923484580695609
|
|
|
|
key: train_precision
|
|
value: [0.74794521 0.73262032 0.74736842 0.76033058 0.74728261 0.73130194
|
|
0.72972973 0.73595506 0.73994638 0.73163842]
|
|
|
|
mean value: 0.7404118658339571
|
|
|
|
key: test_recall
|
|
value: [0.78947368 0.65789474 0.73684211 0.84210526 0.68421053 0.71052632
|
|
0.64864865 0.64864865 0.71052632 0.78947368]
|
|
|
|
mean value: 0.7218349928876244
|
|
|
|
key: train_recall
|
|
value: [0.80294118 0.80588235 0.83529412 0.81176471 0.80882353 0.77647059
|
|
0.79178886 0.76832845 0.81176471 0.76176471]
|
|
|
|
mean value: 0.7974823184405727
|
|
|
|
key: test_roc_auc
|
|
value: [0.73684211 0.68421053 0.69736842 0.76315789 0.67105263 0.61842105
|
|
0.67958748 0.71906117 0.73364154 0.67852063]
|
|
|
|
mean value: 0.6981863442389759
|
|
|
|
key: train_roc_auc
|
|
value: [0.76617647 0.75588235 0.77647059 0.77794118 0.76764706 0.74558824
|
|
0.7488356 0.74592893 0.76365361 0.74158617]
|
|
|
|
mean value: 0.7589710194928411
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.51020408 0.54901961 0.64 0.50980392 0.48214286
|
|
0.5 0.53333333 0.57446809 0.55555556]
|
|
|
|
mean value: 0.5454527442182547
|
|
|
|
key: train_jcc
|
|
value: [0.63194444 0.62272727 0.65137615 0.64637002 0.63510393 0.60411899
|
|
0.6122449 0.60229885 0.63157895 0.5954023 ]
|
|
|
|
mean value: 0.6233165801364813
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [1.10055232 1.55583549 0.54820633 1.45461535 1.24498558 0.32061243
|
|
0.19951272 1.11374784 1.64657617 1.46606708]
|
|
|
|
mean value: 1.065071129798889
|
|
|
|
key: score_time
|
|
value: [0.01419473 0.01374435 0.01233912 0.01229787 0.01339912 0.01267767
|
|
0.01357245 0.01387644 0.0130434 0.01331758]
|
|
|
|
mean value: 0.013246273994445801
|
|
|
|
key: test_mcc
|
|
value: [0.78947368 0.78947368 0.86872191 0.84327404 0.87114007 0.8160721
|
|
0.89466215 0.85123569 0.86956721 0.7341428 ]
|
|
|
|
mean value: 0.8327763333573044
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.89473684 0.93421053 0.92105263 0.93421053 0.90789474
|
|
0.94666667 0.92 0.93333333 0.86666667]
|
|
|
|
mean value: 0.9153508771929825
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.89473684 0.89473684 0.93506494 0.92307692 0.93670886 0.90909091
|
|
0.94736842 0.925 0.93150685 0.87179487]
|
|
|
|
mean value: 0.9169085454365359
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.89473684 0.92307692 0.9 0.90243902 0.8974359
|
|
0.92307692 0.86046512 0.97142857 0.85 ]
|
|
|
|
mean value: 0.9017396139898155
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.89473684 0.94736842 0.94736842 0.97368421 0.92105263
|
|
0.97297297 1. 0.89473684 0.89473684]
|
|
|
|
mean value: 0.9341394025604552
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.89473684 0.93421053 0.92105263 0.93421053 0.90789474
|
|
0.9470128 0.92105263 0.93385491 0.86628734]
|
|
|
|
mean value: 0.9155049786628734
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.80952381 0.80952381 0.87804878 0.85714286 0.88095238 0.83333333
|
|
0.9 0.86046512 0.87179487 0.77272727]
|
|
|
|
mean value: 0.847351223176521
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.14110637 0.11275864 0.09858799 0.15298676 0.11577153 0.12007999
|
|
0.08206964 0.2270782 0.08553147 0.13036823]
|
|
|
|
mean value: 0.126633882522583
|
|
|
|
key: score_time
|
|
value: [0.01302385 0.01298141 0.02917647 0.01925826 0.03711748 0.05580354
|
|
0.03337526 0.01361036 0.01770854 0.05333591]
|
|
|
|
mean value: 0.028539109230041503
|
|
|
|
key: test_mcc
|
|
value: [0.51464253 0.5797509 0.65812266 0.7228974 0.53300179 0.63960215
|
|
0.5737718 0.82825406 0.76031294 0.55526703]
|
|
|
|
mean value: 0.6365623268397554
|
|
|
|
key: train_mcc
|
|
value: [0.79888883 0.82525884 0.78709711 0.80903788 0.79765839 0.81138597
|
|
0.81404632 0.79577596 0.7925366 0.82243288]
|
|
|
|
mean value: 0.8054118770792479
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.78947368 0.82894737 0.85526316 0.76315789 0.81578947
|
|
0.78666667 0.90666667 0.88 0.77333333]
|
|
|
|
mean value: 0.8149298245614035
|
|
|
|
key: train_accuracy
|
|
value: [0.89852941 0.91176471 0.89264706 0.90294118 0.89705882 0.90441176
|
|
0.90602056 0.89720999 0.89427313 0.91042584]
|
|
|
|
mean value: 0.9015282456594973
|
|
|
|
key: test_fscore
|
|
value: [0.77647059 0.79487179 0.83116883 0.86746988 0.7804878 0.82926829
|
|
0.77777778 0.91358025 0.88 0.79518072]
|
|
|
|
mean value: 0.8246275938937893
|
|
|
|
key: train_fscore
|
|
value: [0.90184922 0.91452991 0.89615932 0.90704225 0.90168539 0.90806223
|
|
0.90934844 0.9002849 0.89915966 0.91298146]
|
|
|
|
mean value: 0.9051102792096003
|
|
|
|
key: test_precision
|
|
value: [0.70212766 0.775 0.82051282 0.8 0.72727273 0.77272727
|
|
0.8 0.84090909 0.89189189 0.73333333]
|
|
|
|
mean value: 0.7863774796221605
|
|
|
|
key: train_precision
|
|
value: [0.87327824 0.88674033 0.8677686 0.87027027 0.86290323 0.8746594
|
|
0.87945205 0.87534626 0.85828877 0.88642659]
|
|
|
|
mean value: 0.8735133738102909
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.81578947 0.84210526 0.94736842 0.84210526 0.89473684
|
|
0.75675676 1. 0.86842105 0.86842105]
|
|
|
|
mean value: 0.8704125177809389
|
|
|
|
key: train_recall
|
|
value: [0.93235294 0.94411765 0.92647059 0.94705882 0.94411765 0.94411765
|
|
0.94134897 0.92668622 0.94411765 0.94117647]
|
|
|
|
mean value: 0.9391564602380542
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.78947368 0.82894737 0.85526316 0.76315789 0.81578947
|
|
0.78627312 0.90789474 0.88015647 0.77204836]
|
|
|
|
mean value: 0.814900426742532
|
|
|
|
key: train_roc_auc
|
|
value: [0.89852941 0.91176471 0.89264706 0.90294118 0.89705882 0.90441176
|
|
0.9059686 0.89716664 0.89434621 0.91047093]
|
|
|
|
mean value: 0.9015305330343281
|
|
|
|
key: test_jcc
|
|
value: [0.63461538 0.65957447 0.71111111 0.76595745 0.64 0.70833333
|
|
0.63636364 0.84090909 0.78571429 0.66 ]
|
|
|
|
mean value: 0.7042578756940459
|
|
|
|
key: train_jcc
|
|
value: [0.82124352 0.84251969 0.81185567 0.82989691 0.82097187 0.83160622
|
|
0.83376623 0.81865285 0.81679389 0.83989501]
|
|
|
|
mean value: 0.826720186005957
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01595211 0.01399875 0.01458335 0.01303601 0.01311922 0.01321697
|
|
0.01278615 0.01339293 0.01290536 0.01298928]
|
|
|
|
mean value: 0.013598012924194335
|
|
|
|
key: score_time
|
|
value: [0.01220679 0.01145053 0.01085377 0.01045156 0.01065111 0.0107286
|
|
0.01061702 0.0105207 0.01065397 0.01054144]
|
|
|
|
mean value: 0.010867547988891602
|
|
|
|
key: test_mcc
|
|
value: [0.58218174 0.55282303 0.4234049 0.52236453 0.34510572 0.50952467
|
|
0.65362731 0.44287262 0.6002845 0.48226509]
|
|
|
|
mean value: 0.5114454102851022
|
|
|
|
key: train_mcc
|
|
value: [0.55606702 0.53073218 0.5694948 0.55720889 0.5536544 0.56273143
|
|
0.53963322 0.52368901 0.53387817 0.53742626]
|
|
|
|
mean value: 0.5464515370857125
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.77631579 0.71052632 0.75 0.67105263 0.73684211
|
|
0.82666667 0.72 0.8 0.73333333]
|
|
|
|
mean value: 0.751421052631579
|
|
|
|
key: train_accuracy
|
|
value: [0.77647059 0.76323529 0.78235294 0.77647059 0.775 0.77941176
|
|
0.76798825 0.76064611 0.7650514 0.76651982]
|
|
|
|
mean value: 0.7713146756499957
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.77333333 0.725 0.7816092 0.69135802 0.77777778
|
|
0.82666667 0.69565217 0.8 0.76744186]
|
|
|
|
mean value: 0.7638839032249595
|
|
|
|
key: train_fscore
|
|
value: [0.7877095 0.77731674 0.79558011 0.78947368 0.78720445 0.79166667
|
|
0.78116343 0.77202797 0.77777778 0.78008299]
|
|
|
|
mean value: 0.7840003317290627
|
|
|
|
key: test_precision
|
|
value: [0.76190476 0.78378378 0.69047619 0.69387755 0.65116279 0.67307692
|
|
0.81578947 0.75 0.81081081 0.6875 ]
|
|
|
|
mean value: 0.7318382285454763
|
|
|
|
key: train_precision
|
|
value: [0.75 0.73368146 0.75 0.7460733 0.74670185 0.75
|
|
0.74015748 0.73796791 0.73684211 0.73629243]
|
|
|
|
mean value: 0.7427716535751065
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.76315789 0.76315789 0.89473684 0.73684211 0.92105263
|
|
0.83783784 0.64864865 0.78947368 0.86842105]
|
|
|
|
mean value: 0.8065433854907539
|
|
|
|
key: train_recall
|
|
value: [0.82941176 0.82647059 0.84705882 0.83823529 0.83235294 0.83823529
|
|
0.82697947 0.80938416 0.82352941 0.82941176]
|
|
|
|
mean value: 0.8301069518716577
|
|
|
|
key: test_roc_auc
|
|
value: [0.78947368 0.77631579 0.71052632 0.75 0.67105263 0.73684211
|
|
0.82681366 0.71906117 0.80014225 0.73150782]
|
|
|
|
mean value: 0.7511735419630157
|
|
|
|
key: train_roc_auc
|
|
value: [0.77647059 0.76323529 0.78235294 0.77647059 0.775 0.77941176
|
|
0.7679015 0.76057444 0.76513714 0.76661204]
|
|
|
|
mean value: 0.7713166292910126
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.63043478 0.56862745 0.64150943 0.52830189 0.63636364
|
|
0.70454545 0.53333333 0.66666667 0.62264151]
|
|
|
|
mean value: 0.6199090821353525
|
|
|
|
key: train_jcc
|
|
value: [0.64976959 0.63574661 0.66055046 0.65217391 0.64908257 0.65517241
|
|
0.64090909 0.62870159 0.63636364 0.63945578]
|
|
|
|
mean value: 0.6447925650066497
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02843261 0.02742124 0.05048251 0.02719665 0.02355981 0.02463102
|
|
0.02420449 0.03631282 0.02709699 0.02420211]
|
|
|
|
mean value: 0.029354023933410644
|
|
|
|
key: score_time
|
|
value: [0.01219368 0.01236081 0.03369594 0.01354003 0.01395226 0.01351929
|
|
0.01246238 0.01241994 0.01227188 0.02727246]
|
|
|
|
mean value: 0.016368865966796875
|
|
|
|
key: test_mcc
|
|
value: [0.57735027 0.60715823 0.56881543 0.72111026 0.50156549 0.52631579
|
|
0.64116714 0.73328592 0.66274467 0.70857794]
|
|
|
|
mean value: 0.6248091133241859
|
|
|
|
key: train_mcc
|
|
value: [0.74052678 0.74906183 0.72858912 0.72224216 0.73530684 0.69865923
|
|
0.69418107 0.75375895 0.66338922 0.72603084]
|
|
|
|
mean value: 0.721174604371229
|
|
|
|
key: test_accuracy
|
|
value: [0.77631579 0.80263158 0.77631579 0.84210526 0.75 0.76315789
|
|
0.8 0.86666667 0.81333333 0.85333333]
|
|
|
|
mean value: 0.8043859649122808
|
|
|
|
key: train_accuracy
|
|
value: [0.85735294 0.87205882 0.86176471 0.84852941 0.86764706 0.84852941
|
|
0.82819383 0.87665198 0.82525698 0.85609398]
|
|
|
|
mean value: 0.8542079122397858
|
|
|
|
key: test_fscore
|
|
value: [0.8045977 0.79452055 0.8 0.86363636 0.75949367 0.76315789
|
|
0.82758621 0.86486486 0.78125 0.86075949]
|
|
|
|
mean value: 0.8119866743786215
|
|
|
|
key: train_fscore
|
|
value: [0.87386216 0.86427457 0.86944444 0.86605982 0.86803519 0.84322679
|
|
0.85245902 0.87896254 0.80587276 0.86863271]
|
|
|
|
mean value: 0.8590829988192777
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.82857143 0.72340426 0.76 0.73170732 0.76315789
|
|
0.72 0.86486486 0.96153846 0.82926829]
|
|
|
|
mean value: 0.7896798229072558
|
|
|
|
key: train_precision
|
|
value: [0.78321678 0.92026578 0.82368421 0.77622378 0.86549708 0.87381703
|
|
0.74778761 0.86402266 0.9047619 0.79802956]
|
|
|
|
mean value: 0.8357306396342619
|
|
|
|
key: test_recall
|
|
value: [0.92105263 0.76315789 0.89473684 1. 0.78947368 0.76315789
|
|
0.97297297 0.86486486 0.65789474 0.89473684]
|
|
|
|
mean value: 0.8522048364153627
|
|
|
|
key: train_recall
|
|
value: [0.98823529 0.81470588 0.92058824 0.97941176 0.87058824 0.81470588
|
|
0.99120235 0.89442815 0.72647059 0.95294118]
|
|
|
|
mean value: 0.8953277557357253
|
|
|
|
key: test_roc_auc
|
|
value: [0.77631579 0.80263158 0.77631579 0.84210526 0.75 0.76315789
|
|
0.80227596 0.86664296 0.81543385 0.85277383]
|
|
|
|
mean value: 0.8047652916073968
|
|
|
|
key: train_roc_auc
|
|
value: [0.85735294 0.87205882 0.86176471 0.84852941 0.86764706 0.84852941
|
|
0.82795411 0.87662584 0.82511213 0.85623598]
|
|
|
|
mean value: 0.8541810419182336
|
|
|
|
key: test_jcc
|
|
value: [0.67307692 0.65909091 0.66666667 0.76 0.6122449 0.61702128
|
|
0.70588235 0.76190476 0.64102564 0.75555556]
|
|
|
|
mean value: 0.6852468984816562
|
|
|
|
key: train_jcc
|
|
value: [0.77598152 0.76098901 0.76904177 0.76376147 0.76683938 0.72894737
|
|
0.74285714 0.7840617 0.67486339 0.76777251]
|
|
|
|
mean value: 0.753511525817123
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02292633 0.05192661 0.03333235 0.03678298 0.05591083 0.02576995
|
|
0.02964091 0.04471755 0.04296851 0.02038288]
|
|
|
|
mean value: 0.03643589019775391
|
|
|
|
key: score_time
|
|
value: [0.01329803 0.01247334 0.0191257 0.02639747 0.01451349 0.01193857
|
|
0.01925683 0.01915765 0.01207972 0.01106358]
|
|
|
|
mean value: 0.015930438041687013
|
|
|
|
key: test_mcc
|
|
value: [0.65812266 0.55282303 0.65875812 0.79388419 0.46737879 0.54554473
|
|
0.70857794 0.82825406 0.70648474 0.62775817]
|
|
|
|
mean value: 0.6547586409992989
|
|
|
|
key: train_mcc
|
|
value: [0.72991678 0.82655997 0.72597335 0.7559641 0.78207184 0.75666316
|
|
0.76874897 0.75011406 0.73446196 0.66990707]
|
|
|
|
mean value: 0.7500381262176113
|
|
|
|
key: test_accuracy
|
|
value: [0.82894737 0.77631579 0.80263158 0.89473684 0.72368421 0.76315789
|
|
0.85333333 0.90666667 0.84 0.81333333]
|
|
|
|
mean value: 0.8202807017543859
|
|
|
|
key: train_accuracy
|
|
value: [0.86323529 0.91323529 0.84852941 0.87794118 0.88382353 0.87352941
|
|
0.88399413 0.86637298 0.86343612 0.83406755]
|
|
|
|
mean value: 0.8708164895914313
|
|
|
|
key: test_fscore
|
|
value: [0.83116883 0.77922078 0.83516484 0.9 0.75862069 0.79069767
|
|
0.84507042 0.91358025 0.81818182 0.81081081]
|
|
|
|
mean value: 0.8282516108069643
|
|
|
|
key: train_fscore
|
|
value: [0.85625966 0.91259259 0.86709677 0.87883212 0.89395973 0.88283379
|
|
0.88155922 0.8794702 0.8526149 0.82748092]
|
|
|
|
mean value: 0.8732699894637856
|
|
|
|
key: test_precision
|
|
value: [0.82051282 0.76923077 0.71698113 0.85714286 0.67346939 0.70833333
|
|
0.88235294 0.84090909 0.96428571 0.83333333]
|
|
|
|
mean value: 0.8066551379754963
|
|
|
|
key: train_precision
|
|
value: [0.90228013 0.91940299 0.77241379 0.87246377 0.82222222 0.82233503
|
|
0.90184049 0.80193237 0.92439863 0.86031746]
|
|
|
|
mean value: 0.8599606867884427
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.78947368 1. 0.94736842 0.86842105 0.89473684
|
|
0.81081081 1. 0.71052632 0.78947368]
|
|
|
|
mean value: 0.8652916073968706
|
|
|
|
key: train_recall
|
|
value: [0.81470588 0.90588235 0.98823529 0.88529412 0.97941176 0.95294118
|
|
0.86217009 0.97360704 0.79117647 0.79705882]
|
|
|
|
mean value: 0.8950483008452648
|
|
|
|
key: test_roc_auc
|
|
value: [0.82894737 0.77631579 0.80263158 0.89473684 0.72368421 0.76315789
|
|
0.85277383 0.90789474 0.84174964 0.81365576]
|
|
|
|
mean value: 0.8205547652916074
|
|
|
|
key: train_roc_auc
|
|
value: [0.86323529 0.91323529 0.84852941 0.87794118 0.88382353 0.87352941
|
|
0.88402622 0.86621528 0.86333017 0.83401328]
|
|
|
|
mean value: 0.8707879075383819
|
|
|
|
key: test_jcc
|
|
value: [0.71111111 0.63829787 0.71698113 0.81818182 0.61111111 0.65384615
|
|
0.73170732 0.84090909 0.69230769 0.68181818]
|
|
|
|
mean value: 0.7096271480774228
|
|
|
|
key: train_jcc
|
|
value: [0.74864865 0.83923706 0.76537585 0.78385417 0.80825243 0.7902439
|
|
0.78820375 0.78486998 0.74309392 0.70572917]
|
|
|
|
mean value: 0.7757508875402782
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.25833273 0.2572515 0.25891519 0.25970793 0.25987625 0.26043081
|
|
0.25959468 0.25923753 0.25702286 0.26810098]
|
|
|
|
mean value: 0.2598470449447632
|
|
|
|
key: score_time
|
|
value: [0.01847601 0.01842475 0.0187037 0.01850677 0.01845002 0.01849556
|
|
0.01685953 0.01855373 0.01843381 0.01890397]
|
|
|
|
mean value: 0.01838078498840332
|
|
|
|
key: test_mcc
|
|
value: [0.78947368 0.71077247 0.84327404 0.8229444 0.8160721 0.78947368
|
|
0.86956721 0.76721166 0.84500776 0.73712493]
|
|
|
|
mean value: 0.7990921937987712
|
|
|
|
key: train_mcc
|
|
value: [0.94124161 0.93303521 0.95598573 0.93856417 0.93833676 0.92378517
|
|
0.9357899 0.93564506 0.94720272 0.93852606]
|
|
|
|
mean value: 0.9388112389119484
|
|
|
|
key: test_accuracy
|
|
value: [0.89473684 0.85526316 0.92105263 0.90789474 0.90789474 0.89473684
|
|
0.93333333 0.88 0.92 0.86666667]
|
|
|
|
mean value: 0.8981578947368422
|
|
|
|
key: train_accuracy
|
|
value: [0.97058824 0.96617647 0.97794118 0.96911765 0.96911765 0.96176471
|
|
0.96769457 0.96769457 0.97356828 0.969163 ]
|
|
|
|
mean value: 0.9692826293512999
|
|
|
|
key: test_fscore
|
|
value: [0.89473684 0.85714286 0.92307692 0.91358025 0.90909091 0.89473684
|
|
0.93506494 0.88607595 0.91666667 0.875 ]
|
|
|
|
mean value: 0.9005172171533486
|
|
|
|
key: train_fscore
|
|
value: [0.97076023 0.96681097 0.97810219 0.96952104 0.96934307 0.9622093
|
|
0.96820809 0.96811594 0.97368421 0.96943231]
|
|
|
|
mean value: 0.9696187362973203
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.84615385 0.9 0.86046512 0.8974359 0.89473684
|
|
0.9 0.83333333 0.97058824 0.83333333]
|
|
|
|
mean value: 0.8830783446040125
|
|
|
|
key: train_precision
|
|
value: [0.96511628 0.9490085 0.97101449 0.95702006 0.96231884 0.95114943
|
|
0.95441595 0.95702006 0.96802326 0.95965418]
|
|
|
|
mean value: 0.9594741039791467
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.86842105 0.94736842 0.97368421 0.92105263 0.89473684
|
|
0.97297297 0.94594595 0.86842105 0.92105263]
|
|
|
|
mean value: 0.9208392603129445
|
|
|
|
key: train_recall
|
|
value: [0.97647059 0.98529412 0.98529412 0.98235294 0.97647059 0.97352941
|
|
0.98240469 0.97947214 0.97941176 0.97941176]
|
|
|
|
mean value: 0.9800112126962222
|
|
|
|
key: test_roc_auc
|
|
value: [0.89473684 0.85526316 0.92105263 0.90789474 0.90789474 0.89473684
|
|
0.93385491 0.88086771 0.92069701 0.86593172]
|
|
|
|
mean value: 0.8982930298719772
|
|
|
|
key: train_roc_auc
|
|
value: [0.97058824 0.96617647 0.97794118 0.96911765 0.96911765 0.96176471
|
|
0.96767293 0.96767725 0.97357685 0.96917802]
|
|
|
|
mean value: 0.9692810936691393
|
|
|
|
key: test_jcc
|
|
value: [0.80952381 0.75 0.85714286 0.84090909 0.83333333 0.80952381
|
|
0.87804878 0.79545455 0.84615385 0.77777778]
|
|
|
|
mean value: 0.8197867850306875
|
|
|
|
key: train_jcc
|
|
value: [0.94318182 0.93575419 0.95714286 0.94084507 0.94050992 0.92717087
|
|
0.93837535 0.93820225 0.94871795 0.94067797]
|
|
|
|
mean value: 0.9410578231203559
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09461832 0.08192611 0.07765198 0.08942413 0.08597469 0.0818181
|
|
0.08593726 0.06795788 0.0869019 0.08571911]
|
|
|
|
mean value: 0.08379294872283935
|
|
|
|
key: score_time
|
|
value: [0.0243969 0.027354 0.02514482 0.02463293 0.0249722 0.02222848
|
|
0.02491212 0.02241611 0.02354074 0.0231061 ]
|
|
|
|
mean value: 0.02427043914794922
|
|
|
|
key: test_mcc
|
|
value: [0.8468098 0.8183437 0.84327404 0.8468098 0.86872191 0.78947368
|
|
0.89857968 0.85123569 0.84500776 0.76214986]
|
|
|
|
mean value: 0.8370405922008575
|
|
|
|
key: train_mcc
|
|
value: [0.98825239 0.97653817 0.98236994 0.98825239 0.99121506 0.9707394
|
|
0.97064785 0.97944609 0.99414337 0.98531999]
|
|
|
|
mean value: 0.9826924654129596
|
|
|
|
key: test_accuracy
|
|
value: [0.92105263 0.90789474 0.92105263 0.92105263 0.93421053 0.89473684
|
|
0.94666667 0.92 0.92 0.88 ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.99411765 0.98823529 0.99117647 0.99411765 0.99558824 0.98529412
|
|
0.98531571 0.989721 0.99706314 0.99265786]
|
|
|
|
mean value: 0.9913287121015808
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.90410959 0.92307692 0.925 0.93506494 0.89473684
|
|
0.94871795 0.925 0.91666667 0.88607595]
|
|
|
|
mean value: 0.9175115520706588
|
|
|
|
key: train_fscore
|
|
value: [0.99410029 0.98816568 0.99115044 0.99410029 0.99556869 0.9851632
|
|
0.98538012 0.9897511 0.99705015 0.99265786]
|
|
|
|
mean value: 0.9913087821688488
|
|
|
|
key: test_precision
|
|
value: [0.97058824 0.94285714 0.9 0.88095238 0.92307692 0.89473684
|
|
0.90243902 0.86046512 0.97058824 0.85365854]
|
|
|
|
mean value: 0.9099362436834625
|
|
|
|
key: train_precision
|
|
value: [0.99704142 0.99404762 0.99408284 0.99704142 1. 0.99401198
|
|
0.98250729 0.98830409 1. 0.99120235]
|
|
|
|
mean value: 0.9938239003806941
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.86842105 0.94736842 0.97368421 0.94736842 0.89473684
|
|
1. 1. 0.86842105 0.92105263]
|
|
|
|
mean value: 0.9289473684210526
|
|
|
|
key: train_recall
|
|
value: [0.99117647 0.98235294 0.98823529 0.99117647 0.99117647 0.97647059
|
|
0.98826979 0.99120235 0.99411765 0.99411765]
|
|
|
|
mean value: 0.9888295670174229
|
|
|
|
key: test_roc_auc
|
|
value: [0.92105263 0.90789474 0.92105263 0.92105263 0.93421053 0.89473684
|
|
0.94736842 0.92105263 0.92069701 0.87944523]
|
|
|
|
mean value: 0.9168563300142247
|
|
|
|
key: train_roc_auc
|
|
value: [0.99411765 0.98823529 0.99117647 0.99411765 0.99558824 0.98529412
|
|
0.98531137 0.98971882 0.99705882 0.99266 ]
|
|
|
|
mean value: 0.9913278419872348
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.825 0.85714286 0.86046512 0.87804878 0.80952381
|
|
0.90243902 0.86046512 0.84615385 0.79545455]
|
|
|
|
mean value: 0.8480846941865092
|
|
|
|
key: train_jcc
|
|
value: [0.98826979 0.97660819 0.98245614 0.98826979 0.99117647 0.97076023
|
|
0.97118156 0.97971014 0.99411765 0.98542274]
|
|
|
|
mean value: 0.9827972710141666
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.28219652 0.48666406 0.58636403 0.52659178 0.50711131 0.49877977
|
|
0.61686635 0.48800898 0.58473396 0.57519126]
|
|
|
|
mean value: 0.5152508020401001
|
|
|
|
key: score_time
|
|
value: [0.02304101 0.04786062 0.03291726 0.04315662 0.06411171 0.04829597
|
|
0.05581355 0.03055406 0.05254436 0.07386899]
|
|
|
|
mean value: 0.04721641540527344
|
|
|
|
key: test_mcc
|
|
value: [0.53300179 0.5797509 0.58218174 0.65465367 0.55747847 0.31980107
|
|
0.55230643 0.52099657 0.63072008 0.57594601]
|
|
|
|
mean value: 0.5506836738295658
|
|
|
|
key: train_mcc
|
|
value: [0.92714855 0.93892187 0.95353526 0.92431333 0.94732109 0.93608802
|
|
0.92441873 0.93865144 0.92466808 0.93009663]
|
|
|
|
mean value: 0.9345163017575312
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.78947368 0.78947368 0.81578947 0.77631579 0.65789474
|
|
0.77333333 0.76 0.81333333 0.78666667]
|
|
|
|
mean value: 0.7725438596491228
|
|
|
|
key: train_accuracy
|
|
value: [0.96323529 0.96911765 0.97647059 0.96176471 0.97352941 0.96764706
|
|
0.96182085 0.969163 0.96182085 0.96475771]
|
|
|
|
mean value: 0.9669327114105554
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.78378378 0.8 0.8372093 0.79012346 0.68292683
|
|
0.78481013 0.76315789 0.80555556 0.8 ]
|
|
|
|
mean value: 0.7828054753920506
|
|
|
|
key: train_fscore
|
|
value: [0.96392496 0.96969697 0.97687861 0.96253602 0.97383721 0.96829971
|
|
0.96264368 0.96960926 0.96264368 0.96531792]
|
|
|
|
mean value: 0.9675388027847541
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.80555556 0.76190476 0.75 0.74418605 0.63636364
|
|
0.73809524 0.74358974 0.85294118 0.76190476]
|
|
|
|
mean value: 0.752181364766864
|
|
|
|
key: train_precision
|
|
value: [0.94617564 0.95184136 0.96022727 0.94350282 0.96264368 0.94915254
|
|
0.94366197 0.95714286 0.94101124 0.94886364]
|
|
|
|
mean value: 0.9504223016579505
|
|
|
|
key: test_recall
|
|
value: [0.84210526 0.76315789 0.84210526 0.94736842 0.84210526 0.73684211
|
|
0.83783784 0.78378378 0.76315789 0.84210526]
|
|
|
|
mean value: 0.8200568990042674
|
|
|
|
key: train_recall
|
|
value: [0.98235294 0.98823529 0.99411765 0.98235294 0.98529412 0.98823529
|
|
0.98240469 0.98240469 0.98529412 0.98235294]
|
|
|
|
mean value: 0.985304467828187
|
|
|
|
key: test_roc_auc
|
|
value: [0.76315789 0.78947368 0.78947368 0.81578947 0.77631579 0.65789474
|
|
0.77418208 0.76031294 0.81401138 0.7859175 ]
|
|
|
|
mean value: 0.7726529160739687
|
|
|
|
key: train_roc_auc
|
|
value: [0.96323529 0.96911765 0.97647059 0.96176471 0.97352941 0.96764706
|
|
0.96179058 0.96914352 0.96185527 0.96478351]
|
|
|
|
mean value: 0.9669337588407797
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.64444444 0.66666667 0.72 0.65306122 0.51851852
|
|
0.64583333 0.61702128 0.6744186 0.66666667]
|
|
|
|
mean value: 0.6446630735366333
|
|
|
|
key: train_jcc
|
|
value: [0.93036212 0.94117647 0.95480226 0.92777778 0.9490085 0.93854749
|
|
0.92797784 0.94101124 0.92797784 0.93296089]
|
|
|
|
mean value: 0.9371602418341916
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.07470703 1.07676768 1.07605433 1.08124733 1.07994008 1.08504128
|
|
1.07696247 1.08201265 1.07295084 1.07926488]
|
|
|
|
mean value: 1.0784948587417602
|
|
|
|
key: score_time
|
|
value: [0.0112896 0.0110414 0.01100779 0.01101232 0.01121712 0.01104212
|
|
0.01099777 0.01103234 0.0112071 0.01103258]
|
|
|
|
mean value: 0.011088013648986816
|
|
|
|
key: test_mcc
|
|
value: [0.87114007 0.8183437 0.84327404 0.79388419 0.89597867 0.86872191
|
|
0.86956721 0.82825406 0.86956721 0.7875998 ]
|
|
|
|
mean value: 0.8446330848874422
|
|
|
|
key: train_mcc
|
|
value: [0.98236994 0.979416 0.98529838 0.979416 0.979416 0.97647059
|
|
0.97948024 0.97944627 0.97357254 0.98237882]
|
|
|
|
mean value: 0.9797264762477649
|
|
|
|
key: test_accuracy
|
|
value: [0.93421053 0.90789474 0.92105263 0.89473684 0.94736842 0.93421053
|
|
0.93333333 0.90666667 0.93333333 0.89333333]
|
|
|
|
mean value: 0.9206140350877193
|
|
|
|
key: train_accuracy
|
|
value: [0.99117647 0.98970588 0.99264706 0.98970588 0.98970588 0.98823529
|
|
0.989721 0.989721 0.98678414 0.99118943]
|
|
|
|
mean value: 0.9898592035933317
|
|
|
|
key: test_fscore
|
|
value: [0.93150685 0.90410959 0.92307692 0.9 0.94871795 0.93506494
|
|
0.93506494 0.91358025 0.93150685 0.8974359 ]
|
|
|
|
mean value: 0.9220064173945453
|
|
|
|
key: train_fscore
|
|
value: [0.99120235 0.98969072 0.99263623 0.989721 0.98969072 0.98823529
|
|
0.98969072 0.989721 0.98678414 0.99117647]
|
|
|
|
mean value: 0.9898548643477328
|
|
|
|
key: test_precision
|
|
value: [0.97142857 0.94285714 0.9 0.85714286 0.925 0.92307692
|
|
0.9 0.84090909 0.97142857 0.875 ]
|
|
|
|
mean value: 0.9106843156843157
|
|
|
|
key: train_precision
|
|
value: [0.98830409 0.99115044 0.99410029 0.98826979 0.99115044 0.98823529
|
|
0.99408284 0.99117647 0.98533724 0.99117647]
|
|
|
|
mean value: 0.9902983387162225
|
|
|
|
key: test_recall
|
|
value: [0.89473684 0.86842105 0.94736842 0.94736842 0.97368421 0.94736842
|
|
0.97297297 1. 0.89473684 0.92105263]
|
|
|
|
mean value: 0.9367709815078236
|
|
|
|
key: train_recall
|
|
value: [0.99411765 0.98823529 0.99117647 0.99117647 0.98823529 0.98823529
|
|
0.98533724 0.98826979 0.98823529 0.99117647]
|
|
|
|
mean value: 0.9894195273417286
|
|
|
|
key: test_roc_auc
|
|
value: [0.93421053 0.90789474 0.92105263 0.89473684 0.94736842 0.93421053
|
|
0.93385491 0.90789474 0.93385491 0.89295875]
|
|
|
|
mean value: 0.9208036984352773
|
|
|
|
key: train_roc_auc
|
|
value: [0.99117647 0.98970588 0.99264706 0.98970588 0.98970588 0.98823529
|
|
0.98972745 0.98972313 0.98678627 0.99118941]
|
|
|
|
mean value: 0.9898602725547698
|
|
|
|
key: test_jcc
|
|
value: [0.87179487 0.825 0.85714286 0.81818182 0.90243902 0.87804878
|
|
0.87804878 0.84090909 0.87179487 0.81395349]
|
|
|
|
mean value: 0.8557313583561457
|
|
|
|
key: train_jcc
|
|
value: [0.98255814 0.97959184 0.98538012 0.97965116 0.97959184 0.97674419
|
|
0.97959184 0.97965116 0.97391304 0.98250729]
|
|
|
|
mean value: 0.9799180610433935
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0625093 0.05630112 0.13917351 0.13230419 0.1321888 0.18530345
|
|
0.12923551 0.08270359 0.09576845 0.1563518 ]
|
|
|
|
mean value: 0.1171839714050293
|
|
|
|
key: score_time
|
|
value: [0.01461554 0.01291466 0.01728034 0.0169518 0.02212596 0.0203948
|
|
0.01974607 0.02070308 0.0650835 0.01618576]
|
|
|
|
mean value: 0.0226001501083374
|
|
|
|
key: test_mcc
|
|
value: [0.28867513 0.21650635 0. 0.36650833 0. 0.20361574
|
|
0.19267462 0.28231492 0.14179531 0.26379128]
|
|
|
|
mean value: 0.1955881671621231
|
|
|
|
key: train_mcc
|
|
value: [0.30823376 0.2937039 0.33605003 0.29664794 0.33333333 0.32785951
|
|
0.30556788 0.31412811 0.31320691 0.31320691]
|
|
|
|
mean value: 0.31419382967467313
|
|
|
|
key: test_accuracy
|
|
value: [0.60526316 0.57894737 0.5 0.61842105 0.5 0.56578947
|
|
0.54666667 0.58666667 0.54666667 0.58666667]
|
|
|
|
mean value: 0.5635087719298245
|
|
|
|
key: train_accuracy
|
|
value: [0.58676471 0.57941176 0.60147059 0.58088235 0.6 0.59705882
|
|
0.58590308 0.59030837 0.58883994 0.58883994]
|
|
|
|
mean value: 0.5899479571564309
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.68627451 0.64150943 0.72380952 0.65454545 0.68571429
|
|
0.67924528 0.69902913 0.67924528 0.7047619 ]
|
|
|
|
mean value: 0.6860017157789859
|
|
|
|
key: train_fscore
|
|
value: [0.70759625 0.70393375 0.7150368 0.70466321 0.71428571 0.71278826
|
|
0.70746888 0.70967742 0.70833333 0.70833333]
|
|
|
|
mean value: 0.7092116957047646
|
|
|
|
key: test_precision
|
|
value: [0.5625 0.546875 0.5 0.56716418 0.5 0.53731343
|
|
0.52173913 0.54545455 0.52941176 0.55223881]
|
|
|
|
mean value: 0.5362696858505658
|
|
|
|
key: train_precision
|
|
value: [0.54750403 0.54313099 0.55646481 0.544 0.55555556 0.55374593
|
|
0.54735152 0.55 0.5483871 0.5483871 ]
|
|
|
|
mean value: 0.5494527030286511
|
|
|
|
key: test_recall
|
|
value: [0.94736842 0.92105263 0.89473684 1. 0.94736842 0.94736842
|
|
0.97297297 0.97297297 0.94736842 0.97368421]
|
|
|
|
mean value: 0.9524893314366999
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.60526316 0.57894737 0.5 0.61842105 0.5 0.56578947
|
|
0.55227596 0.59174964 0.54125178 0.5814367 ]
|
|
|
|
mean value: 0.5635135135135135
|
|
|
|
key: train_roc_auc
|
|
value: [0.58676471 0.57941176 0.60147059 0.58088235 0.6 0.59705882
|
|
0.58529412 0.58970588 0.58944282 0.58944282]
|
|
|
|
mean value: 0.589947386579265
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.52238806 0.47222222 0.56716418 0.48648649 0.52173913
|
|
0.51428571 0.53731343 0.51428571 0.54411765]
|
|
|
|
mean value: 0.5225457131870079
|
|
|
|
key: train_jcc
|
|
value: [0.54750403 0.54313099 0.55646481 0.544 0.55555556 0.55374593
|
|
0.54735152 0.55 0.5483871 0.5483871 ]
|
|
|
|
mean value: 0.5494527030286511
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.47
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08026552 0.07109356 0.05684352 0.0584805 0.05514002 0.05477428
|
|
0.05650496 0.08210063 0.0794189 0.08002782]
|
|
|
|
mean value: 0.06746497154235839
|
|
|
|
key: score_time
|
|
value: [0.06043243 0.04529953 0.03625798 0.03986263 0.0336926 0.04159045
|
|
0.04100561 0.03788924 0.03786874 0.0377779 ]
|
|
|
|
mean value: 0.041167712211608885
|
|
|
|
key: test_mcc
|
|
value: [0.63510735 0.60715823 0.74620251 0.7228974 0.58630197 0.58630197
|
|
0.62660028 0.84500776 0.7626532 0.65500602]
|
|
|
|
mean value: 0.6773236685854388
|
|
|
|
key: train_mcc
|
|
value: [0.75761782 0.79299297 0.77911707 0.75946807 0.77598004 0.76811548
|
|
0.78844765 0.77251734 0.75985095 0.77081681]
|
|
|
|
mean value: 0.7724924202081385
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.80263158 0.86842105 0.85526316 0.78947368 0.78947368
|
|
0.81333333 0.92 0.88 0.82666667]
|
|
|
|
mean value: 0.8361052631578948
|
|
|
|
key: train_accuracy
|
|
value: [0.87794118 0.89558824 0.88823529 0.87794118 0.88676471 0.88235294
|
|
0.8928047 0.88546256 0.87812041 0.88399413]
|
|
|
|
mean value: 0.8849205320894877
|
|
|
|
key: test_fscore
|
|
value: [0.825 0.81012658 0.87804878 0.86746988 0.80487805 0.80487805
|
|
0.81081081 0.92307692 0.87671233 0.83544304]
|
|
|
|
mean value: 0.8436444440474875
|
|
|
|
key: train_fscore
|
|
value: [0.88193457 0.89900427 0.89265537 0.88359046 0.89108911 0.88764045
|
|
0.89732771 0.88920455 0.88359046 0.88857546]
|
|
|
|
mean value: 0.889461239611828
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.7804878 0.81818182 0.8 0.75 0.75
|
|
0.81081081 0.87804878 0.91428571 0.80487805]
|
|
|
|
mean value: 0.8092407263138971
|
|
|
|
key: train_precision
|
|
value: [0.85399449 0.87052342 0.85869565 0.84450402 0.85831063 0.84946237
|
|
0.86216216 0.86225895 0.84450402 0.85365854]
|
|
|
|
mean value: 0.8558074245615411
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.84210526 0.94736842 0.94736842 0.86842105 0.86842105
|
|
0.81081081 0.97297297 0.84210526 0.86842105]
|
|
|
|
mean value: 0.8836415362731153
|
|
|
|
key: train_recall
|
|
value: [0.91176471 0.92941176 0.92941176 0.92647059 0.92647059 0.92941176
|
|
0.93548387 0.91788856 0.92647059 0.92647059]
|
|
|
|
mean value: 0.9259254786958772
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.80263158 0.86842105 0.85526316 0.78947368 0.78947368
|
|
0.81330014 0.92069701 0.88051209 0.82610242]
|
|
|
|
mean value: 0.8361664295874822
|
|
|
|
key: train_roc_auc
|
|
value: [0.87794118 0.89558824 0.88823529 0.87794118 0.88676471 0.88235294
|
|
0.89274194 0.88541487 0.87819131 0.88405641]
|
|
|
|
mean value: 0.8849228048990857
|
|
|
|
key: test_jcc
|
|
value: [0.70212766 0.68085106 0.7826087 0.76595745 0.67346939 0.67346939
|
|
0.68181818 0.85714286 0.7804878 0.7173913 ]
|
|
|
|
mean value: 0.7315323789562058
|
|
|
|
key: train_jcc
|
|
value: [0.78880407 0.81653747 0.80612245 0.79145729 0.80357143 0.7979798
|
|
0.81377551 0.80051151 0.79145729 0.79949239]
|
|
|
|
mean value: 0.8009709192284509
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.60972524 0.72298741 0.55905247 0.54508948 0.53112173 0.59377766
|
|
0.57462525 0.43055153 0.44217324 0.42025185]
|
|
|
|
mean value: 0.542935585975647
|
|
|
|
key: score_time
|
|
value: [0.02643824 0.03537893 0.02685165 0.0371201 0.02021074 0.03413725
|
|
0.02915049 0.0235014 0.02404976 0.02640557]
|
|
|
|
mean value: 0.028324413299560546
|
|
|
|
key: test_mcc
|
|
value: [0.63510735 0.60715823 0.74620251 0.7228974 0.56225353 0.58630197
|
|
0.62660028 0.80568158 0.7626532 0.62967232]
|
|
|
|
mean value: 0.6684528365499358
|
|
|
|
key: train_mcc
|
|
value: [0.75761782 0.79299297 0.77911707 0.80824904 0.80629962 0.76811548
|
|
0.81127337 0.79551607 0.75985095 0.81654848]
|
|
|
|
mean value: 0.7895580866958624
|
|
|
|
key: test_accuracy
|
|
value: [0.81578947 0.80263158 0.86842105 0.85526316 0.77631579 0.78947368
|
|
0.81333333 0.89333333 0.88 0.81333333]
|
|
|
|
mean value: 0.8307894736842105
|
|
|
|
key: train_accuracy
|
|
value: [0.87794118 0.89558824 0.88823529 0.90294118 0.90147059 0.88235294
|
|
0.90455213 0.89720999 0.87812041 0.90748899]
|
|
|
|
mean value: 0.893590092424635
|
|
|
|
key: test_fscore
|
|
value: [0.825 0.81012658 0.87804878 0.86746988 0.79518072 0.80487805
|
|
0.81081081 0.90243902 0.87671233 0.825 ]
|
|
|
|
mean value: 0.839566617792459
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_8020.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.88193457 0.89900427 0.89265537 0.90651558 0.90576653 0.88764045
|
|
0.90806223 0.9 0.88359046 0.91012839]
|
|
|
|
mean value: 0.8975297842641614
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.7804878 0.81818182 0.8 0.73333333 0.75
|
|
0.81081081 0.82222222 0.91428571 0.78571429]
|
|
|
|
mean value: 0.8000750275140519
|
|
|
|
key: train_precision
|
|
value: [0.85399449 0.87052342 0.85869565 0.87431694 0.86792453 0.84946237
|
|
0.87704918 0.87743733 0.84450402 0.88365651]
|
|
|
|
mean value: 0.8657564429670169
|
|
|
|
key: test_recall
|
|
value: [0.86842105 0.84210526 0.94736842 0.94736842 0.86842105 0.86842105
|
|
0.81081081 1. 0.84210526 0.86842105]
|
|
|
|
mean value: 0.8863442389758179
|
|
|
|
key: train_recall
|
|
value: [0.91176471 0.92941176 0.92941176 0.94117647 0.94705882 0.92941176
|
|
0.94134897 0.92375367 0.92647059 0.93823529]
|
|
|
|
mean value: 0.9318043815766776
|
|
|
|
key: test_roc_auc
|
|
value: [0.81578947 0.80263158 0.86842105 0.85526316 0.77631579 0.78947368
|
|
0.81330014 0.89473684 0.88051209 0.8125889 ]
|
|
|
|
mean value: 0.8309032716927454
|
|
|
|
key: train_roc_auc
|
|
value: [0.87794118 0.89558824 0.88823529 0.90294118 0.90147059 0.88235294
|
|
0.90449802 0.89717095 0.87819131 0.90753407]
|
|
|
|
mean value: 0.8935923753665689
|
|
|
|
key: test_jcc
|
|
value: [0.70212766 0.68085106 0.7826087 0.76595745 0.66 0.67346939
|
|
0.68181818 0.82222222 0.7804878 0.70212766]
|
|
|
|
mean value: 0.7251670122112963
|
|
|
|
key: train_jcc
|
|
value: [0.78880407 0.81653747 0.80612245 0.82901554 0.8277635 0.7979798
|
|
0.83160622 0.81818182 0.79145729 0.83507853]
|
|
|
|
mean value: 0.8142546682353851
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|