19916 lines
989 KiB
Text
19916 lines
989 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_7030.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 817
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 817
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 175
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification [COMPLETE data]: 70/30
|
|
Original data size: (817, 175)
|
|
Train data size: (547, 175)
|
|
Test data size: (270, 175)
|
|
y_train numbers: Counter({0: 317, 1: 230})
|
|
y_train ratio: 1.3782608695652174
|
|
|
|
y_test_numbers: Counter({0: 156, 1: 114})
|
|
y_test ratio: 1.368421052631579
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({0: 317, 1: 230}) Data dim: (547, 175)
|
|
|
|
Simple Random OverSampling
|
|
Counter({1: 317, 0: 317})
|
|
(634, 175)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 230, 1: 230})
|
|
(460, 175)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 317, 1: 317})
|
|
(634, 175)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 317, 0: 317})
|
|
(634, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis [COMPLETE DATA]: 70/30 split
|
|
Gene name: katG
|
|
Drug name: isoniazid
|
|
|
|
Output directory: /home/tanu/git/Data/isoniazid/output/ml/tts_cd_7030/
|
|
|
|
Sanity checks:
|
|
Total input features: 175
|
|
|
|
Training data size: (547, 175)
|
|
Test data size: (270, 175)
|
|
|
|
Target feature numbers (training data): Counter({0: 317, 1: 230})
|
|
Target features ratio (training data: 1.3782608695652174
|
|
|
|
Target feature numbers (test data): Counter({0: 156, 1: 114})
|
|
Target features ratio (test data): 1.368421052631579
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04179597 0.03566599 0.0486362 0.03750038 0.06210589 0.05623746
|
|
0.03859067 0.03731346 0.03781509 0.03734136]
|
|
|
|
mean value: 0.04330024719238281
|
|
|
|
key: score_time
|
|
value: [0.01308751 0.01230502 0.01895547 0.01553655 0.02064085 0.01912975
|
|
0.01783442 0.01562071 0.01579142 0.01538372]
|
|
|
|
mean value: 0.016428542137145997
|
|
|
|
key: test_mcc
|
|
value: [0.58514212 0.51163988 0.42094935 0.56841568 0.66622595 0.48454371
|
|
0.48270989 0.56175441 0.70238885 0.61883928]
|
|
|
|
mean value: 0.5602609133851035
|
|
|
|
key: train_mcc
|
|
value: [0.68269604 0.70811086 0.70414271 0.69939808 0.68309179 0.72955341
|
|
0.69675461 0.75489046 0.69671044 0.70063657]
|
|
|
|
mean value: 0.7055984965434541
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.76363636 0.70909091 0.78181818 0.83636364 0.74545455
|
|
0.74545455 0.77777778 0.85185185 0.81481481]
|
|
|
|
mean value: 0.7826262626262627
|
|
|
|
key: train_accuracy
|
|
value: [0.84552846 0.85772358 0.85569106 0.85365854 0.84552846 0.86788618
|
|
0.85162602 0.88032454 0.85192698 0.85395538]
|
|
|
|
mean value: 0.8563849172974488
|
|
|
|
key: test_fscore
|
|
value: [0.74418605 0.71111111 0.68 0.76 0.80851064 0.70833333
|
|
0.61111111 0.76 0.83333333 0.77272727]
|
|
|
|
mean value: 0.7389312846425662
|
|
|
|
key: train_fscore
|
|
value: [0.81553398 0.83091787 0.82891566 0.82524272 0.81642512 0.8441247
|
|
0.82577566 0.85851319 0.82494005 0.82692308]
|
|
|
|
mean value: 0.829731202774635
|
|
|
|
key: test_precision
|
|
value: [0.8 0.72727273 0.62962963 0.7037037 0.79166667 0.68
|
|
0.84615385 0.7037037 0.8 0.80952381]
|
|
|
|
mean value: 0.7491654086654087
|
|
|
|
key: train_precision
|
|
value: [0.8195122 0.83091787 0.82692308 0.82926829 0.81642512 0.83809524
|
|
0.81603774 0.85238095 0.81904762 0.82296651]
|
|
|
|
mean value: 0.8271574612446937
|
|
|
|
key: test_recall
|
|
value: [0.69565217 0.69565217 0.73913043 0.82608696 0.82608696 0.73913043
|
|
0.47826087 0.82608696 0.86956522 0.73913043]
|
|
|
|
mean value: 0.7434782608695651
|
|
|
|
key: train_recall
|
|
value: [0.8115942 0.83091787 0.83091787 0.82125604 0.81642512 0.85024155
|
|
0.83574879 0.8647343 0.83091787 0.83091787]
|
|
|
|
mean value: 0.8323671497584542
|
|
|
|
key: test_roc_auc
|
|
value: [0.78532609 0.75407609 0.71331522 0.78804348 0.83491848 0.74456522
|
|
0.70788043 0.78401122 0.85413745 0.80504909]
|
|
|
|
mean value: 0.7771322755960729
|
|
|
|
key: train_roc_auc
|
|
value: [0.84088482 0.85405543 0.85230104 0.84922451 0.84154589 0.86547165
|
|
0.84945334 0.87817135 0.84902537 0.85077362]
|
|
|
|
mean value: 0.8530907028389867
|
|
|
|
key: test_jcc
|
|
value: [0.59259259 0.55172414 0.51515152 0.61290323 0.67857143 0.5483871
|
|
0.44 0.61290323 0.71428571 0.62962963]
|
|
|
|
mean value: 0.5896148566549011
|
|
|
|
key: train_jcc
|
|
value: [0.68852459 0.7107438 0.70781893 0.70247934 0.68979592 0.73029046
|
|
0.70325203 0.75210084 0.70204082 0.70491803]
|
|
|
|
mean value: 0.7091964757469712
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.84767032 0.98310471 0.90616822 0.91747785 1.05338669 1.27765465
|
|
1.05463815 0.8606627 1.05289483 0.91209054]
|
|
|
|
mean value: 0.9865748643875122
|
|
|
|
key: score_time
|
|
value: [0.01637244 0.01902914 0.01560688 0.01560307 0.01704359 0.01559401
|
|
0.01545811 0.01693511 0.01557612 0.01548362]
|
|
|
|
mean value: 0.016270208358764648
|
|
|
|
key: test_mcc
|
|
value: [0.58703744 0.70108696 0.50741958 0.67387468 0.70662625 0.61131498
|
|
0.51203338 0.62728193 0.66155709 0.65775818]
|
|
|
|
mean value: 0.6245990450015361
|
|
|
|
key: train_mcc
|
|
value: [0.86303482 0.83412421 0.85822527 0.82876173 0.82943939 0.8580675
|
|
0.85653699 0.85096756 0.82173265 0.87508868]
|
|
|
|
mean value: 0.8475978801436712
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.85454545 0.74545455 0.83636364 0.85454545 0.8
|
|
0.76363636 0.81481481 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8136026936026937
|
|
|
|
key: train_accuracy
|
|
value: [0.93292683 0.91869919 0.93089431 0.91666667 0.91666667 0.93089431
|
|
0.92886179 0.92697769 0.9127789 0.93914807]
|
|
|
|
mean value: 0.9254514421411962
|
|
|
|
key: test_fscore
|
|
value: [0.73170732 0.82608696 0.73076923 0.81632653 0.83333333 0.78431373
|
|
0.66666667 0.79166667 0.80851064 0.79069767]
|
|
|
|
mean value: 0.7780078739849725
|
|
|
|
key: train_fscore
|
|
value: [0.92124105 0.9047619 0.9178744 0.90024331 0.90167866 0.91747573
|
|
0.91803279 0.91428571 0.8973747 0.92753623]
|
|
|
|
mean value: 0.9120504479974278
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.82608696 0.65517241 0.76923077 0.8 0.71428571
|
|
0.8125 0.76 0.79166667 0.85 ]
|
|
|
|
mean value: 0.7812275853831326
|
|
|
|
key: train_precision
|
|
value: [0.91037736 0.89201878 0.9178744 0.90686275 0.8952381 0.92195122
|
|
0.89090909 0.90140845 0.88679245 0.92753623]
|
|
|
|
mean value: 0.9050968820144447
|
|
|
|
key: test_recall
|
|
value: [0.65217391 0.82608696 0.82608696 0.86956522 0.86956522 0.86956522
|
|
0.56521739 0.82608696 0.82608696 0.73913043]
|
|
|
|
mean value: 0.7869565217391304
|
|
|
|
key: train_recall
|
|
value: [0.93236715 0.9178744 0.9178744 0.89371981 0.90821256 0.91304348
|
|
0.9468599 0.92753623 0.90821256 0.92753623]
|
|
|
|
mean value: 0.9193236714975845
|
|
|
|
key: test_roc_auc
|
|
value: [0.77921196 0.85054348 0.75679348 0.84103261 0.85665761 0.80978261
|
|
0.7357337 0.81626928 0.83239832 0.82117812]
|
|
|
|
mean value: 0.8099601157082749
|
|
|
|
key: train_roc_auc
|
|
value: [0.93285024 0.91858632 0.92911264 0.91352657 0.91550979 0.92845156
|
|
0.93132469 0.92705483 0.91214824 0.93754434]
|
|
|
|
mean value: 0.9246109217505099
|
|
|
|
key: test_jcc
|
|
value: [0.57692308 0.7037037 0.57575758 0.68965517 0.71428571 0.64516129
|
|
0.5 0.65517241 0.67857143 0.65384615]
|
|
|
|
mean value: 0.639307652961713
|
|
|
|
key: train_jcc
|
|
value: [0.8539823 0.82608696 0.84821429 0.81858407 0.8209607 0.84753363
|
|
0.84848485 0.84210526 0.81385281 0.86486486]
|
|
|
|
mean value: 0.8384669735254815
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01469278 0.01110578 0.01064801 0.01035357 0.01014662 0.01015067
|
|
0.01030087 0.01057434 0.01018524 0.01026011]
|
|
|
|
mean value: 0.010841798782348634
|
|
|
|
key: score_time
|
|
value: [0.01228666 0.00966001 0.00951886 0.00936699 0.00913262 0.0093379
|
|
0.00910378 0.00910497 0.00907636 0.00917196]
|
|
|
|
mean value: 0.009576010704040527
|
|
|
|
key: test_mcc
|
|
value: [0.27863911 0.61131498 0.37855111 0.34721618 0.55163043 0.53758181
|
|
0.39590764 0.34195219 0.41195324 0.17664036]
|
|
|
|
mean value: 0.4031387068121861
|
|
|
|
key: train_mcc
|
|
value: [0.43665035 0.53827187 0.5702923 0.49863477 0.46849367 0.51713016
|
|
0.49341477 0.51754167 0.48030445 0.48526784]
|
|
|
|
mean value: 0.5006001832001062
|
|
|
|
key: test_accuracy
|
|
value: [0.65454545 0.8 0.69090909 0.67272727 0.78181818 0.76363636
|
|
0.70909091 0.62962963 0.7037037 0.59259259]
|
|
|
|
mean value: 0.6998653198653199
|
|
|
|
key: train_accuracy
|
|
value: [0.7296748 0.7703252 0.78658537 0.75203252 0.73373984 0.7601626
|
|
0.74796748 0.72616633 0.74239351 0.73833671]
|
|
|
|
mean value: 0.7487384356602187
|
|
|
|
key: test_fscore
|
|
value: [0.55813953 0.78431373 0.65306122 0.64 0.73913043 0.74509804
|
|
0.63636364 0.66666667 0.68 0.54166667]
|
|
|
|
mean value: 0.6644439928558977
|
|
|
|
key: train_fscore
|
|
value: [0.6395664 0.74141876 0.75862069 0.71759259 0.70561798 0.7293578
|
|
0.71689498 0.73887814 0.70804598 0.71772429]
|
|
|
|
mean value: 0.7173717604061177
|
|
|
|
key: test_precision
|
|
value: [0.6 0.71428571 0.61538462 0.59259259 0.73913043 0.67857143
|
|
0.66666667 0.54054054 0.62962963 0.52 ]
|
|
|
|
mean value: 0.6296801622453796
|
|
|
|
key: train_precision
|
|
value: [0.72839506 0.70434783 0.72368421 0.68888889 0.65966387 0.69432314
|
|
0.67965368 0.61612903 0.6754386 0.656 ]
|
|
|
|
mean value: 0.682652430528455
|
|
|
|
key: test_recall
|
|
value: [0.52173913 0.86956522 0.69565217 0.69565217 0.73913043 0.82608696
|
|
0.60869565 0.86956522 0.73913043 0.56521739]
|
|
|
|
mean value: 0.7130434782608696
|
|
|
|
key: train_recall
|
|
value: [0.57004831 0.7826087 0.79710145 0.74879227 0.75845411 0.76811594
|
|
0.75845411 0.92270531 0.74396135 0.79227053]
|
|
|
|
mean value: 0.7642512077294686
|
|
|
|
key: test_roc_auc
|
|
value: [0.63586957 0.80978261 0.69157609 0.67595109 0.77581522 0.77241848
|
|
0.69497283 0.66058906 0.70827489 0.58906031]
|
|
|
|
mean value: 0.7014310133239832
|
|
|
|
key: train_roc_auc
|
|
value: [0.70783117 0.7720061 0.78802441 0.75158912 0.73712179 0.76125095
|
|
0.74940249 0.7533107 0.74261005 0.74578562]
|
|
|
|
mean value: 0.7508932397376333
|
|
|
|
key: test_jcc
|
|
value: [0.38709677 0.64516129 0.48484848 0.47058824 0.5862069 0.59375
|
|
0.46666667 0.5 0.51515152 0.37142857]
|
|
|
|
mean value: 0.5020898434457208
|
|
|
|
key: train_jcc
|
|
value: [0.47011952 0.58909091 0.61111111 0.55956679 0.54513889 0.57400722
|
|
0.55871886 0.58588957 0.5480427 0.55972696]
|
|
|
|
mean value: 0.560141253706926
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0105474 0.01046133 0.01060605 0.01042318 0.0104599 0.01057959
|
|
0.01052642 0.01053691 0.01061201 0.01110291]
|
|
|
|
mean value: 0.010585570335388183
|
|
|
|
key: score_time
|
|
value: [0.00915074 0.0091815 0.00931263 0.00919509 0.00912094 0.00941777
|
|
0.00916123 0.00909567 0.00914884 0.00942039]
|
|
|
|
mean value: 0.009220480918884277
|
|
|
|
key: test_mcc
|
|
value: [0.43639872 0.35136547 0.26246118 0.47690217 0.37855111 0.51757513
|
|
0.52002216 0.36008804 0.46984572 0.35286527]
|
|
|
|
mean value: 0.41260749687190534
|
|
|
|
key: train_mcc
|
|
value: [0.53522558 0.48022157 0.52596414 0.55027047 0.50963899 0.55595916
|
|
0.55027047 0.55363278 0.52533698 0.52804105]
|
|
|
|
mean value: 0.5314561192117819
|
|
|
|
key: test_accuracy
|
|
value: [0.72727273 0.69090909 0.63636364 0.74545455 0.69090909 0.76363636
|
|
0.76363636 0.68518519 0.74074074 0.68518519]
|
|
|
|
mean value: 0.712929292929293
|
|
|
|
key: train_accuracy
|
|
value: [0.77439024 0.74390244 0.76829268 0.7804878 0.75813008 0.78455285
|
|
0.7804878 0.78296146 0.76876268 0.77079108]
|
|
|
|
mean value: 0.7712759115420769
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.58536585 0.58333333 0.69565217 0.65306122 0.72340426
|
|
0.64864865 0.63829787 0.69565217 0.62222222]
|
|
|
|
mean value: 0.6512304424504864
|
|
|
|
key: train_fscore
|
|
value: [0.72727273 0.70560748 0.72727273 0.74038462 0.72261072 0.73891626
|
|
0.74038462 0.73965937 0.72463768 0.72371638]
|
|
|
|
mean value: 0.7290462570692664
|
|
|
|
key: test_precision
|
|
value: [0.68181818 0.66666667 0.56 0.69565217 0.61538462 0.70833333
|
|
0.85714286 0.625 0.69565217 0.63636364]
|
|
|
|
mean value: 0.6742013638535378
|
|
|
|
key: train_precision
|
|
value: [0.74 0.68325792 0.72037915 0.73684211 0.6981982 0.75376884
|
|
0.73684211 0.74509804 0.72463768 0.73267327]
|
|
|
|
mean value: 0.7271697306118926
|
|
|
|
key: test_recall
|
|
value: [0.65217391 0.52173913 0.60869565 0.69565217 0.69565217 0.73913043
|
|
0.52173913 0.65217391 0.69565217 0.60869565]
|
|
|
|
mean value: 0.6391304347826087
|
|
|
|
key: train_recall
|
|
value: [0.71497585 0.7294686 0.73429952 0.74396135 0.74879227 0.72463768
|
|
0.74396135 0.73429952 0.72463768 0.71497585]
|
|
|
|
mean value: 0.7314009661835749
|
|
|
|
key: test_roc_auc
|
|
value: [0.71671196 0.66711957 0.63247283 0.73845109 0.69157609 0.76019022
|
|
0.72961957 0.68092567 0.73492286 0.67531557]
|
|
|
|
mean value: 0.7027305399719495
|
|
|
|
key: train_roc_auc
|
|
value: [0.76625985 0.74192728 0.76364099 0.77548945 0.75685228 0.77635393
|
|
0.77548945 0.77624067 0.76266849 0.76308233]
|
|
|
|
mean value: 0.7658004708233541
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4137931 0.41176471 0.53333333 0.48484848 0.56666667
|
|
0.48 0.46875 0.53333333 0.4516129 ]
|
|
|
|
mean value: 0.48441025307382535
|
|
|
|
key: train_jcc
|
|
value: [0.57142857 0.54512635 0.57142857 0.58778626 0.56569343 0.5859375
|
|
0.58778626 0.58687259 0.56818182 0.56704981]
|
|
|
|
mean value: 0.5737291159872184
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.0096848 0.01012468 0.0108521 0.01071572 0.01056528 0.00939178
|
|
0.01068068 0.01059747 0.01128244 0.01058197]
|
|
|
|
mean value: 0.01044769287109375
|
|
|
|
key: score_time
|
|
value: [0.07402992 0.01488328 0.01613545 0.01658916 0.0157609 0.01435757
|
|
0.01600146 0.01821113 0.01360273 0.0135684 ]
|
|
|
|
mean value: 0.021314001083374022
|
|
|
|
key: test_mcc
|
|
value: [0.15082668 0.26246118 0.30472022 0.13543408 0.39181209 0.50741958
|
|
0.26855929 0.31155357 0.46984572 0.07116075]
|
|
|
|
mean value: 0.2873793170233383
|
|
|
|
key: train_mcc
|
|
value: [0.56875384 0.53684571 0.56134034 0.57605667 0.55427498 0.55955672
|
|
0.5401765 0.63143761 0.527033 0.56456169]
|
|
|
|
mean value: 0.5620037067821323
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.63636364 0.65454545 0.58181818 0.70909091 0.74545455
|
|
0.65454545 0.66666667 0.74074074 0.55555556]
|
|
|
|
mean value: 0.6544781144781144
|
|
|
|
key: train_accuracy
|
|
value: [0.79065041 0.77439024 0.78658537 0.79471545 0.78455285 0.78658537
|
|
0.77642276 0.82150101 0.77079108 0.78904665]
|
|
|
|
mean value: 0.7875241181417899
|
|
|
|
key: test_fscore
|
|
value: [0.45 0.58333333 0.6122449 0.48888889 0.61904762 0.73076923
|
|
0.51282051 0.59090909 0.69565217 0.42857143]
|
|
|
|
mean value: 0.5712237176212331
|
|
|
|
key: train_fscore
|
|
value: [0.74692875 0.73123487 0.74452555 0.74812968 0.73232323 0.73945409
|
|
0.73170732 0.78109453 0.72098765 0.74257426]
|
|
|
|
mean value: 0.7418959919811685
|
|
|
|
key: test_precision
|
|
value: [0.52941176 0.56 0.57692308 0.5 0.68421053 0.65517241
|
|
0.625 0.61904762 0.69565217 0.47368421]
|
|
|
|
mean value: 0.5919101785224831
|
|
|
|
key: train_precision
|
|
value: [0.76 0.73300971 0.75 0.77319588 0.76719577 0.76020408
|
|
0.73891626 0.80512821 0.73737374 0.76142132]
|
|
|
|
mean value: 0.7586444952311476
|
|
|
|
key: test_recall
|
|
value: [0.39130435 0.60869565 0.65217391 0.47826087 0.56521739 0.82608696
|
|
0.43478261 0.56521739 0.69565217 0.39130435]
|
|
|
|
mean value: 0.5608695652173913
|
|
|
|
key: train_recall
|
|
value: [0.73429952 0.7294686 0.73913043 0.72463768 0.70048309 0.71980676
|
|
0.72463768 0.75845411 0.70531401 0.72463768]
|
|
|
|
mean value: 0.7260869565217392
|
|
|
|
key: test_roc_auc
|
|
value: [0.57065217 0.63247283 0.65421196 0.56725543 0.6888587 0.75679348
|
|
0.6236413 0.65357644 0.73492286 0.53436185]
|
|
|
|
mean value: 0.6416747019635344
|
|
|
|
key: train_roc_auc
|
|
value: [0.78293923 0.76824307 0.78009153 0.78512586 0.77304856 0.77744724
|
|
0.76933638 0.81279349 0.76174791 0.78015101]
|
|
|
|
mean value: 0.7790924293098207
|
|
|
|
key: test_jcc
|
|
value: [0.29032258 0.41176471 0.44117647 0.32352941 0.44827586 0.57575758
|
|
0.34482759 0.41935484 0.53333333 0.27272727]
|
|
|
|
mean value: 0.4061069637684177
|
|
|
|
key: train_jcc
|
|
value: [0.59607843 0.57633588 0.59302326 0.59760956 0.57768924 0.58661417
|
|
0.57692308 0.64081633 0.56370656 0.59055118]
|
|
|
|
mean value: 0.5899347691320936
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02656698 0.02517915 0.02399397 0.02391505 0.02421665 0.02414322
|
|
0.02453303 0.02408743 0.02439761 0.0245142 ]
|
|
|
|
mean value: 0.024554729461669922
|
|
|
|
key: score_time
|
|
value: [0.01422763 0.01268554 0.0124557 0.01299238 0.01262712 0.01349807
|
|
0.0126636 0.01236033 0.01320577 0.012743 ]
|
|
|
|
mean value: 0.012945914268493652
|
|
|
|
key: test_mcc
|
|
value: [0.4299228 0.50848012 0.5262129 0.43189061 0.54764925 0.49468252
|
|
0.46046933 0.30312793 0.54201786 0.58258986]
|
|
|
|
mean value: 0.4827043155774927
|
|
|
|
key: train_mcc
|
|
value: [0.62618197 0.61352703 0.63465505 0.56215877 0.57498214 0.6140883
|
|
0.57937741 0.63511179 0.60989754 0.64470036]
|
|
|
|
mean value: 0.6094680362515074
|
|
|
|
key: test_accuracy
|
|
value: [0.72727273 0.76363636 0.76363636 0.72727273 0.78181818 0.74545455
|
|
0.72727273 0.66666667 0.77777778 0.7962963 ]
|
|
|
|
mean value: 0.7477104377104378
|
|
|
|
key: train_accuracy
|
|
value: [0.81910569 0.81300813 0.82317073 0.78861789 0.79471545 0.81300813
|
|
0.79674797 0.82352941 0.81135903 0.82758621]
|
|
|
|
mean value: 0.8110848628770263
|
|
|
|
key: test_fscore
|
|
value: [0.63414634 0.68292683 0.73469388 0.65116279 0.72727273 0.72
|
|
0.54545455 0.55 0.72727273 0.73170732]
|
|
|
|
mean value: 0.6704637156053572
|
|
|
|
key: train_fscore
|
|
value: [0.77468354 0.76767677 0.77974684 0.72916667 0.73901809 0.77114428
|
|
0.74489796 0.77862595 0.76574307 0.79115479]
|
|
|
|
mean value: 0.7641857958137329
|
|
|
|
key: test_precision
|
|
value: [0.72222222 0.77777778 0.69230769 0.7 0.76190476 0.66666667
|
|
0.9 0.64705882 0.76190476 0.83333333]
|
|
|
|
mean value: 0.7463176039646627
|
|
|
|
key: train_precision
|
|
value: [0.81382979 0.8042328 0.81914894 0.79096045 0.79444444 0.79487179
|
|
0.78918919 0.82258065 0.8 0.805 ]
|
|
|
|
mean value: 0.8034258053281179
|
|
|
|
key: test_recall
|
|
value: [0.56521739 0.60869565 0.7826087 0.60869565 0.69565217 0.7826087
|
|
0.39130435 0.47826087 0.69565217 0.65217391]
|
|
|
|
mean value: 0.6260869565217392
|
|
|
|
key: train_recall
|
|
value: [0.73913043 0.73429952 0.74396135 0.6763285 0.69082126 0.74879227
|
|
0.70531401 0.73913043 0.73429952 0.77777778]
|
|
|
|
mean value: 0.7289855072463768
|
|
|
|
key: test_roc_auc
|
|
value: [0.7044837 0.74184783 0.76630435 0.71059783 0.76970109 0.75067935
|
|
0.68002717 0.64235624 0.76718093 0.77769986]
|
|
|
|
mean value: 0.7310878330995793
|
|
|
|
key: train_roc_auc
|
|
value: [0.80816171 0.80223748 0.81233155 0.77325197 0.78049835 0.8042207
|
|
0.78423595 0.81187291 0.80071619 0.82070707]
|
|
|
|
mean value: 0.7998233879011911
|
|
|
|
key: test_jcc
|
|
value: [0.46428571 0.51851852 0.58064516 0.48275862 0.57142857 0.5625
|
|
0.375 0.37931034 0.57142857 0.57692308]
|
|
|
|
mean value: 0.5082798579392016
|
|
|
|
key: train_jcc
|
|
value: [0.6322314 0.62295082 0.63900415 0.57377049 0.58606557 0.62753036
|
|
0.59349593 0.6375 0.62040816 0.65447154]
|
|
|
|
mean value: 0.6187428446894745
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.87738419 1.86355472 1.93836021 1.95548415 2.00255823 1.92464495
|
|
1.87634706 1.90383768 1.97237706 2.0040803 ]
|
|
|
|
mean value: 1.9318628549575805
|
|
|
|
key: score_time
|
|
value: [0.01933503 0.01476908 0.01467705 0.0127182 0.01512384 0.01787472
|
|
0.01688218 0.01641417 0.02369785 0.01700139]
|
|
|
|
mean value: 0.016849350929260255
|
|
|
|
key: test_mcc
|
|
value: [0.2859164 0.51757513 0.40378643 0.50741958 0.59190054 0.60004379
|
|
0.39594831 0.45526408 0.66901612 0.50265363]
|
|
|
|
mean value: 0.49295240047741534
|
|
|
|
key: train_mcc
|
|
value: [0.96305083 0.93934297 0.95474605 0.95876404 0.96664124 0.97538269
|
|
0.97094453 0.98335709 0.96267627 0.98335709]
|
|
|
|
mean value: 0.9658262804528608
|
|
|
|
key: test_accuracy
|
|
value: [0.65454545 0.76363636 0.69090909 0.74545455 0.8 0.8
|
|
0.70909091 0.72222222 0.83333333 0.75925926]
|
|
|
|
mean value: 0.7478451178451179
|
|
|
|
key: train_accuracy
|
|
value: [0.98170732 0.9695122 0.97764228 0.9796748 0.98373984 0.98780488
|
|
0.98577236 0.99188641 0.98174442 0.99188641]
|
|
|
|
mean value: 0.9831370899915896
|
|
|
|
key: test_fscore
|
|
value: [0.57777778 0.72340426 0.67924528 0.73076923 0.76595745 0.7755102
|
|
0.55555556 0.70588235 0.81632653 0.69767442]
|
|
|
|
mean value: 0.7028103055488797
|
|
|
|
key: train_fscore
|
|
value: [0.97862233 0.96487119 0.97387173 0.97619048 0.98067633 0.98571429
|
|
0.98321343 0.99029126 0.97841727 0.99029126]
|
|
|
|
mean value: 0.9802159566259778
|
|
|
|
key: test_precision
|
|
value: [0.59090909 0.70833333 0.6 0.65517241 0.75 0.73076923
|
|
0.76923077 0.64285714 0.76923077 0.75 ]
|
|
|
|
mean value: 0.696650275012344
|
|
|
|
key: train_precision
|
|
value: [0.96261682 0.93636364 0.95794393 0.96244131 0.98067633 0.97183099
|
|
0.97619048 0.99512195 0.97142857 0.99512195]
|
|
|
|
mean value: 0.9709735963057159
|
|
|
|
key: test_recall
|
|
value: [0.56521739 0.73913043 0.7826087 0.82608696 0.7826087 0.82608696
|
|
0.43478261 0.7826087 0.86956522 0.65217391]
|
|
|
|
mean value: 0.7260869565217392
|
|
|
|
key: train_recall
|
|
value: [0.99516908 0.99516908 0.99033816 0.99033816 0.98067633 1.
|
|
0.99033816 0.98550725 0.98550725 0.98550725]
|
|
|
|
mean value: 0.9898550724637681
|
|
|
|
key: test_roc_auc
|
|
value: [0.6419837 0.76019022 0.70380435 0.75679348 0.79755435 0.80366848
|
|
0.6705163 0.73001403 0.83800842 0.7454418 ]
|
|
|
|
mean value: 0.7447975105189341
|
|
|
|
key: train_roc_auc
|
|
value: [0.98354945 0.97302314 0.97937961 0.98113399 0.98332062 0.98947368
|
|
0.98639715 0.99100537 0.98226411 0.99100537]
|
|
|
|
mean value: 0.9840552506227563
|
|
|
|
key: test_jcc
|
|
value: [0.40625 0.56666667 0.51428571 0.57575758 0.62068966 0.63333333
|
|
0.38461538 0.54545455 0.68965517 0.53571429]
|
|
|
|
mean value: 0.5472422333413712
|
|
|
|
key: train_jcc
|
|
value: [0.95813953 0.9321267 0.94907407 0.95348837 0.96208531 0.97183099
|
|
0.96698113 0.98076923 0.95774648 0.98076923]
|
|
|
|
mean value: 0.9613011044342935
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04909205 0.02950025 0.02932739 0.02902603 0.02834392 0.02826023
|
|
0.02964878 0.03143978 0.0290525 0.02814364]
|
|
|
|
mean value: 0.031183457374572753
|
|
|
|
key: score_time
|
|
value: [0.00961804 0.00957942 0.00933838 0.01010132 0.00951147 0.00917363
|
|
0.00951052 0.00927472 0.00931883 0.00968361]
|
|
|
|
mean value: 0.009510993957519531
|
|
|
|
key: test_mcc
|
|
value: [0.62352005 0.82153646 0.70108696 0.75878131 0.78065376 0.70662625
|
|
0.66559476 0.58152196 0.552175 0.741478 ]
|
|
|
|
mean value: 0.6932974500375436
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.90909091 0.85454545 0.87272727 0.89090909 0.85454545
|
|
0.83636364 0.7962963 0.77777778 0.87037037]
|
|
|
|
mean value: 0.8480808080808081
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.77272727 0.89795918 0.82608696 0.8627451 0.875 0.83333333
|
|
0.7804878 0.75555556 0.75 0.82926829]
|
|
|
|
mean value: 0.8183163497411561
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.80952381 0.84615385 0.82608696 0.78571429 0.84 0.8
|
|
0.88888889 0.77272727 0.72 0.94444444]
|
|
|
|
mean value: 0.8233539503974286
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.73913043 0.95652174 0.82608696 0.95652174 0.91304348 0.86956522
|
|
0.69565217 0.73913043 0.7826087 0.73913043]
|
|
|
|
mean value: 0.8217391304347826
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.80706522 0.91576087 0.85054348 0.88451087 0.89402174 0.85665761
|
|
0.81657609 0.78892006 0.77840112 0.85343619]
|
|
|
|
mean value: 0.8445893232819074
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.62962963 0.81481481 0.7037037 0.75862069 0.77777778 0.71428571
|
|
0.64 0.60714286 0.6 0.70833333]
|
|
|
|
mean value: 0.6954308520343003
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14270854 0.14585114 0.13640237 0.13542676 0.1369288 0.13830209
|
|
0.13576722 0.13378549 0.13537693 0.1409452 ]
|
|
|
|
mean value: 0.13814945220947267
|
|
|
|
key: score_time
|
|
value: [0.01938272 0.01929069 0.01900387 0.01840878 0.01852822 0.01847625
|
|
0.01848555 0.01832962 0.01863289 0.01892066]
|
|
|
|
mean value: 0.018745923042297365
|
|
|
|
key: test_mcc
|
|
value: [0.58703744 0.50851637 0.44324972 0.56841568 0.70108696 0.56841568
|
|
0.39125402 0.42510136 0.51911209 0.62566799]
|
|
|
|
mean value: 0.5337857310299543
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.76363636 0.72727273 0.78181818 0.85454545 0.78181818
|
|
0.70909091 0.72222222 0.75925926 0.81481481]
|
|
|
|
mean value: 0.7714478114478115
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.73170732 0.69767442 0.68085106 0.76 0.82608696 0.76
|
|
0.57894737 0.65116279 0.73469388 0.75 ]
|
|
|
|
mean value: 0.7171123792699096
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.75 0.66666667 0.7037037 0.82608696 0.7037037
|
|
0.73333333 0.7 0.69230769 0.88235294]
|
|
|
|
mean value: 0.7491488330746643
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.65217391 0.65217391 0.69565217 0.82608696 0.82608696 0.82608696
|
|
0.47826087 0.60869565 0.7826087 0.65217391]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.77921196 0.74796196 0.72282609 0.78804348 0.85054348 0.78804348
|
|
0.67663043 0.70757363 0.76227209 0.79382889]
|
|
|
|
mean value: 0.7616935483870968
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57692308 0.53571429 0.51612903 0.61290323 0.7037037 0.61290323
|
|
0.40740741 0.48275862 0.58064516 0.6 ]
|
|
|
|
mean value: 0.5629087739599419
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0106895 0.0107975 0.01070595 0.01074672 0.01078081 0.01070833
|
|
0.0135324 0.01077175 0.01183271 0.01100755]
|
|
|
|
mean value: 0.01115732192993164
|
|
|
|
key: score_time
|
|
value: [0.00930071 0.00926113 0.00914121 0.00889707 0.0094676 0.00902438
|
|
0.00900888 0.00885224 0.00996494 0.00909019]
|
|
|
|
mean value: 0.009200835227966308
|
|
|
|
key: test_mcc
|
|
value: [0.39590764 0.21105878 0.30472022 0.25271739 0.27280815 0.51163988
|
|
0.09242443 0.31155357 0.27664637 0.08108929]
|
|
|
|
mean value: 0.27105657192462734
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.70909091 0.58181818 0.65454545 0.63636364 0.65454545 0.76363636
|
|
0.56363636 0.66666667 0.64814815 0.55555556]
|
|
|
|
mean value: 0.6434006734006734
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.63636364 0.59649123 0.6122449 0.56521739 0.53658537 0.71111111
|
|
0.45454545 0.59090909 0.57777778 0.45454545]
|
|
|
|
mean value: 0.5735791408439891
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.5 0.57692308 0.56521739 0.61111111 0.72727273
|
|
0.47619048 0.61904762 0.59090909 0.47619048]
|
|
|
|
mean value: 0.5809528635615592
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.60869565 0.73913043 0.65217391 0.56521739 0.47826087 0.69565217
|
|
0.43478261 0.56521739 0.56521739 0.43478261]
|
|
|
|
mean value: 0.5739130434782609
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.69497283 0.60394022 0.65421196 0.6263587 0.62975543 0.75407609
|
|
0.5455163 0.65357644 0.63744741 0.53997195]
|
|
|
|
mean value: 0.6339827314165498
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.46666667 0.425 0.44117647 0.39393939 0.36666667 0.55172414
|
|
0.29411765 0.41935484 0.40625 0.29411765]
|
|
|
|
mean value: 0.40590134686193213
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.00051999 1.9661603 1.94647527 1.96586847 1.95577812 1.93665648
|
|
1.92854357 1.96198702 1.95868993 1.9296658 ]
|
|
|
|
mean value: 1.9550344944000244
|
|
|
|
key: score_time
|
|
value: [0.09752798 0.09560466 0.0961225 0.09526682 0.09944749 0.09337711
|
|
0.09278417 0.10019994 0.09292531 0.09270859]
|
|
|
|
mean value: 0.09559645652770996
|
|
|
|
key: test_mcc
|
|
value: [0.70187922 0.88784567 0.74055136 0.68504815 0.75878131 0.75878131
|
|
0.70187922 0.5802059 0.77749578 0.78693802]
|
|
|
|
mean value: 0.7379405937626433
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85454545 0.94545455 0.87272727 0.83636364 0.87272727 0.87272727
|
|
0.85454545 0.7962963 0.88888889 0.88888889]
|
|
|
|
mean value: 0.8683164983164983
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.93333333 0.85106383 0.82352941 0.8627451 0.8627451
|
|
0.80952381 0.74418605 0.875 0.85 ]
|
|
|
|
mean value: 0.8421650436522952
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.95454545 0.83333333 0.75 0.78571429 0.78571429
|
|
0.89473684 0.8 0.84 1. ]
|
|
|
|
mean value: 0.8538781043517886
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.73913043 0.91304348 0.86956522 0.91304348 0.95652174 0.95652174
|
|
0.73913043 0.69565217 0.91304348 0.73913043]
|
|
|
|
mean value: 0.8434782608695652
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83831522 0.94089674 0.87228261 0.84714674 0.88451087 0.88451087
|
|
0.83831522 0.78330996 0.89200561 0.86956522]
|
|
|
|
mean value: 0.865085904628331
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.875 0.74074074 0.7 0.75862069 0.75862069
|
|
0.68 0.59259259 0.77777778 0.73913043]
|
|
|
|
mean value: 0.7302482925204065
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.84873724 0.99962497 1.07342482 1.02383947 1.04548478 1.01116562
|
|
1.02130485 0.99912429 1.02942729 1.06653047]
|
|
|
|
mean value: 1.1118663787841796
|
|
|
|
key: score_time
|
|
value: [0.21156573 0.26648188 0.28589416 0.15180969 0.2804842 0.25872636
|
|
0.24671173 0.21345377 0.25705051 0.28484893]
|
|
|
|
mean value: 0.24570269584655763
|
|
|
|
key: test_mcc
|
|
value: [0.66559476 0.88920218 0.78065376 0.70662625 0.75878131 0.74770557
|
|
0.73839363 0.65775818 0.81229162 0.82092207]
|
|
|
|
mean value: 0.7577929322746051
|
|
|
|
key: train_mcc
|
|
value: [0.92932537 0.92916753 0.94173192 0.93358762 0.92552675 0.94190647
|
|
0.92507398 0.9418183 0.91734185 0.92563864]
|
|
|
|
mean value: 0.9311118449851191
|
|
|
|
key: test_accuracy
|
|
value: [0.83636364 0.94545455 0.89090909 0.85454545 0.87272727 0.87272727
|
|
0.87272727 0.83333333 0.90740741 0.90740741]
|
|
|
|
mean value: 0.8793602693602693
|
|
|
|
key: train_accuracy
|
|
value: [0.96544715 0.96544715 0.97154472 0.96747967 0.96341463 0.97154472
|
|
0.96341463 0.97160243 0.95943205 0.96348884]
|
|
|
|
mean value: 0.9662816009498837
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.93617021 0.875 0.83333333 0.8627451 0.85714286
|
|
0.8372093 0.79069767 0.89361702 0.87804878]
|
|
|
|
mean value: 0.8544452084667999
|
|
|
|
key: train_fscore
|
|
value: [0.95923261 0.95903614 0.96634615 0.96172249 0.95714286 0.96650718
|
|
0.95673077 0.96634615 0.95238095 0.95714286]
|
|
|
|
mean value: 0.96025881671487
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.91666667 0.84 0.8 0.78571429 0.80769231
|
|
0.9 0.85 0.875 1. ]
|
|
|
|
mean value: 0.8663962148962149
|
|
|
|
key: train_precision
|
|
value: [0.95238095 0.95673077 0.96172249 0.95260664 0.94366197 0.95734597
|
|
0.95215311 0.96172249 0.93896714 0.94366197]
|
|
|
|
mean value: 0.9520953494183401
|
|
|
|
key: test_recall
|
|
value: [0.69565217 0.95652174 0.91304348 0.86956522 0.95652174 0.91304348
|
|
0.7826087 0.73913043 0.91304348 0.7826087 ]
|
|
|
|
mean value: 0.8521739130434782
|
|
|
|
key: train_recall
|
|
value: [0.96618357 0.96135266 0.97101449 0.97101449 0.97101449 0.97584541
|
|
0.96135266 0.97101449 0.96618357 0.97101449]
|
|
|
|
mean value: 0.9685990338164251
|
|
|
|
key: test_roc_auc
|
|
value: [0.81657609 0.94701087 0.89402174 0.85665761 0.88451087 0.87839674
|
|
0.86005435 0.82117812 0.90813464 0.89130435]
|
|
|
|
mean value: 0.8757845371669004
|
|
|
|
key: train_roc_auc
|
|
value: [0.96554793 0.96488685 0.97147216 0.96796339 0.96445461 0.97213323
|
|
0.96313247 0.97152123 0.96036451 0.96452823]
|
|
|
|
mean value: 0.9666004615775782
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.88 0.77777778 0.71428571 0.75862069 0.75
|
|
0.72 0.65384615 0.80769231 0.7826087 ]
|
|
|
|
mean value: 0.74848313389093
|
|
|
|
key: train_jcc
|
|
value: [0.92165899 0.9212963 0.93488372 0.92626728 0.91780822 0.93518519
|
|
0.91705069 0.93488372 0.90909091 0.91780822]
|
|
|
|
mean value: 0.9235933229314366
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01144552 0.01041913 0.01031971 0.01029325 0.01053071 0.01064229
|
|
0.01054692 0.01049185 0.01066327 0.01136136]
|
|
|
|
mean value: 0.010671401023864746
|
|
|
|
key: score_time
|
|
value: [0.00913405 0.00913501 0.00921345 0.0090847 0.00909209 0.00911283
|
|
0.00987315 0.00917554 0.00909829 0.00913 ]
|
|
|
|
mean value: 0.009204912185668945
|
|
|
|
key: test_mcc
|
|
value: [0.43639872 0.35136547 0.26246118 0.47690217 0.37855111 0.51757513
|
|
0.52002216 0.36008804 0.46984572 0.35286527]
|
|
|
|
mean value: 0.41260749687190534
|
|
|
|
key: train_mcc
|
|
value: [0.53522558 0.48022157 0.52596414 0.55027047 0.50963899 0.55595916
|
|
0.55027047 0.55363278 0.52533698 0.52804105]
|
|
|
|
mean value: 0.5314561192117819
|
|
|
|
key: test_accuracy
|
|
value: [0.72727273 0.69090909 0.63636364 0.74545455 0.69090909 0.76363636
|
|
0.76363636 0.68518519 0.74074074 0.68518519]
|
|
|
|
mean value: 0.712929292929293
|
|
|
|
key: train_accuracy
|
|
value: [0.77439024 0.74390244 0.76829268 0.7804878 0.75813008 0.78455285
|
|
0.7804878 0.78296146 0.76876268 0.77079108]
|
|
|
|
mean value: 0.7712759115420769
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.58536585 0.58333333 0.69565217 0.65306122 0.72340426
|
|
0.64864865 0.63829787 0.69565217 0.62222222]
|
|
|
|
mean value: 0.6512304424504864
|
|
|
|
key: train_fscore
|
|
value: [0.72727273 0.70560748 0.72727273 0.74038462 0.72261072 0.73891626
|
|
0.74038462 0.73965937 0.72463768 0.72371638]
|
|
|
|
mean value: 0.7290462570692664
|
|
|
|
key: test_precision
|
|
value: [0.68181818 0.66666667 0.56 0.69565217 0.61538462 0.70833333
|
|
0.85714286 0.625 0.69565217 0.63636364]
|
|
|
|
mean value: 0.6742013638535378
|
|
|
|
key: train_precision
|
|
value: [0.74 0.68325792 0.72037915 0.73684211 0.6981982 0.75376884
|
|
0.73684211 0.74509804 0.72463768 0.73267327]
|
|
|
|
mean value: 0.7271697306118926
|
|
|
|
key: test_recall
|
|
value: [0.65217391 0.52173913 0.60869565 0.69565217 0.69565217 0.73913043
|
|
0.52173913 0.65217391 0.69565217 0.60869565]
|
|
|
|
mean value: 0.6391304347826087
|
|
|
|
key: train_recall
|
|
value: [0.71497585 0.7294686 0.73429952 0.74396135 0.74879227 0.72463768
|
|
0.74396135 0.73429952 0.72463768 0.71497585]
|
|
|
|
mean value: 0.7314009661835749
|
|
|
|
key: test_roc_auc
|
|
value: [0.71671196 0.66711957 0.63247283 0.73845109 0.69157609 0.76019022
|
|
0.72961957 0.68092567 0.73492286 0.67531557]
|
|
|
|
mean value: 0.7027305399719495
|
|
|
|
key: train_roc_auc
|
|
value: [0.76625985 0.74192728 0.76364099 0.77548945 0.75685228 0.77635393
|
|
0.77548945 0.77624067 0.76266849 0.76308233]
|
|
|
|
mean value: 0.7658004708233541
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4137931 0.41176471 0.53333333 0.48484848 0.56666667
|
|
0.48 0.46875 0.53333333 0.4516129 ]
|
|
|
|
mean value: 0.48441025307382535
|
|
|
|
key: train_jcc
|
|
value: [0.57142857 0.54512635 0.57142857 0.58778626 0.56569343 0.5859375
|
|
0.58778626 0.58687259 0.56818182 0.56704981]
|
|
|
|
mean value: 0.5737291159872184
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.12921071 0.09907699 0.09061193 0.08828115 0.08906078 0.08684015
|
|
0.08911514 0.09572601 0.08669972 0.08724833]
|
|
|
|
mean value: 0.09418709278106689
|
|
|
|
key: score_time
|
|
value: [0.01232672 0.0113709 0.01125932 0.01129842 0.01127052 0.01127601
|
|
0.01118422 0.01114106 0.01104665 0.01139307]
|
|
|
|
mean value: 0.011356687545776368
|
|
|
|
key: test_mcc
|
|
value: [0.70187922 0.89536735 0.82153646 0.82153646 0.78961518 0.70662625
|
|
0.85054348 0.73395976 0.81229162 0.85538121]
|
|
|
|
mean value: 0.7988736984470703
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85454545 0.94545455 0.90909091 0.90909091 0.89090909 0.85454545
|
|
0.92727273 0.87037037 0.90740741 0.92592593]
|
|
|
|
mean value: 0.8994612794612794
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.93877551 0.89795918 0.89795918 0.88 0.83333333
|
|
0.91304348 0.84444444 0.89361702 0.9047619 ]
|
|
|
|
mean value: 0.8813417869151978
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.88461538 0.84615385 0.84615385 0.81481481 0.8
|
|
0.91304348 0.86363636 0.875 1. ]
|
|
|
|
mean value: 0.8738154575740388
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.73913043 1. 0.95652174 0.95652174 0.95652174 0.86956522
|
|
0.91304348 0.82608696 0.91304348 0.82608696]
|
|
|
|
mean value: 0.8956521739130434
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83831522 0.953125 0.91576087 0.91576087 0.90013587 0.85665761
|
|
0.92527174 0.86465638 0.90813464 0.91304348]
|
|
|
|
mean value: 0.899086167601683
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.88461538 0.81481481 0.81481481 0.78571429 0.71428571
|
|
0.84 0.73076923 0.80769231 0.82608696]
|
|
|
|
mean value: 0.7898793509228291
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05119705 0.08430624 0.08820415 0.07830334 0.07586288 0.06692553
|
|
0.0715065 0.08402658 0.04372358 0.05681276]
|
|
|
|
mean value: 0.07008686065673828
|
|
|
|
key: score_time
|
|
value: [0.01247501 0.02614236 0.02369547 0.01927376 0.02228785 0.0220902
|
|
0.0245657 0.02134919 0.01254559 0.01263094]
|
|
|
|
mean value: 0.019705605506896973
|
|
|
|
key: test_mcc
|
|
value: [0.66176788 0.49468252 0.48454371 0.55857122 0.56841568 0.62586896
|
|
0.4299228 0.5744289 0.62131837 0.65774086]
|
|
|
|
mean value: 0.5677260905292308
|
|
|
|
key: train_mcc
|
|
value: [0.8096421 0.81315076 0.82082225 0.80846845 0.80013948 0.80241214
|
|
0.83501834 0.80555438 0.79682809 0.80948363]
|
|
|
|
mean value: 0.8101519608539156
|
|
|
|
key: test_accuracy
|
|
value: [0.83636364 0.74545455 0.74545455 0.78181818 0.78181818 0.8
|
|
0.72727273 0.77777778 0.81481481 0.83333333]
|
|
|
|
mean value: 0.7844107744107744
|
|
|
|
key: train_accuracy
|
|
value: [0.90650407 0.90853659 0.91260163 0.90650407 0.90243902 0.90243902
|
|
0.91869919 0.90466531 0.90060852 0.90669371]
|
|
|
|
mean value: 0.9069691122874718
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.72 0.70833333 0.75 0.76 0.79245283
|
|
0.63414634 0.76923077 0.7826087 0.8 ]
|
|
|
|
mean value: 0.7507469644286975
|
|
|
|
key: train_fscore
|
|
value: [0.89099526 0.89260143 0.89638554 0.88942308 0.88461538 0.88732394
|
|
0.90566038 0.88836105 0.88305489 0.89047619]
|
|
|
|
mean value: 0.8908897145580277
|
|
|
|
key: test_precision
|
|
value: [0.85 0.66666667 0.68 0.72 0.7037037 0.7
|
|
0.72222222 0.68965517 0.7826087 0.81818182]
|
|
|
|
mean value: 0.7333038278840378
|
|
|
|
key: train_precision
|
|
value: [0.8744186 0.88207547 0.89423077 0.88516746 0.88038278 0.8630137
|
|
0.88479263 0.87383178 0.87264151 0.87793427]
|
|
|
|
mean value: 0.8788488967608109
|
|
|
|
key: test_recall
|
|
value: [0.73913043 0.7826087 0.73913043 0.7826087 0.82608696 0.91304348
|
|
0.56521739 0.86956522 0.7826087 0.7826087 ]
|
|
|
|
mean value: 0.7782608695652173
|
|
|
|
key: train_recall
|
|
value: [0.90821256 0.90338164 0.89855072 0.89371981 0.88888889 0.91304348
|
|
0.92753623 0.90338164 0.89371981 0.90338164]
|
|
|
|
mean value: 0.9033816425120773
|
|
|
|
key: test_roc_auc
|
|
value: [0.82269022 0.75067935 0.74456522 0.78192935 0.78804348 0.81589674
|
|
0.7044837 0.78962132 0.81065919 0.82678822]
|
|
|
|
mean value: 0.7835356767180925
|
|
|
|
key: train_roc_auc
|
|
value: [0.90673786 0.90783117 0.91067887 0.90475464 0.9005848 0.90389016
|
|
0.91990847 0.90448802 0.89965711 0.90623628]
|
|
|
|
mean value: 0.9064767370945861
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.5625 0.5483871 0.6 0.61290323 0.65625
|
|
0.46428571 0.625 0.64285714 0.66666667]
|
|
|
|
mean value: 0.6032696000236323
|
|
|
|
key: train_jcc
|
|
value: [0.8034188 0.80603448 0.81222707 0.8008658 0.79310345 0.79746835
|
|
0.82758621 0.7991453 0.79059829 0.80257511]
|
|
|
|
mean value: 0.8033022867921553
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02037907 0.01109266 0.00997972 0.00990462 0.00994921 0.00996852
|
|
0.00993156 0.0100584 0.01054645 0.01049662]
|
|
|
|
mean value: 0.011230683326721192
|
|
|
|
key: score_time
|
|
value: [0.00996089 0.00915313 0.00902486 0.00884271 0.00889897 0.00887513
|
|
0.00885701 0.0089767 0.00954866 0.0093143 ]
|
|
|
|
mean value: 0.009145236015319825
|
|
|
|
key: test_mcc
|
|
value: [0.4105162 0.54684566 0.44324972 0.48454371 0.63259873 0.5262129
|
|
0.43189061 0.47706807 0.54201786 0.36008804]
|
|
|
|
mean value: 0.4855031497137916
|
|
|
|
key: train_mcc
|
|
value: [0.53047441 0.53357737 0.53918167 0.54965675 0.51912852 0.5401765
|
|
0.53276187 0.55541609 0.55003098 0.53609212]
|
|
|
|
mean value: 0.538649627835123
|
|
|
|
key: test_accuracy
|
|
value: [0.70909091 0.78181818 0.72727273 0.74545455 0.81818182 0.76363636
|
|
0.72727273 0.74074074 0.77777778 0.68518519]
|
|
|
|
mean value: 0.7476430976430977
|
|
|
|
key: train_accuracy
|
|
value: [0.7703252 0.77439024 0.77642276 0.7804878 0.76422764 0.77642276
|
|
0.7703252 0.78296146 0.77890467 0.77079108]
|
|
|
|
mean value: 0.7745258826827619
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.71428571 0.68085106 0.70833333 0.79166667 0.73469388
|
|
0.65116279 0.70833333 0.72727273 0.63829787]
|
|
|
|
mean value: 0.7021564045977349
|
|
|
|
key: train_fscore
|
|
value: [0.73031026 0.72180451 0.72906404 0.73913043 0.72511848 0.73170732
|
|
0.73411765 0.74340528 0.74352941 0.73781903]
|
|
|
|
mean value: 0.7336006408609944
|
|
|
|
key: test_precision
|
|
value: [0.64 0.78947368 0.66666667 0.68 0.76 0.69230769
|
|
0.7 0.68 0.76190476 0.625 ]
|
|
|
|
mean value: 0.6995352805089647
|
|
|
|
key: train_precision
|
|
value: [0.72169811 0.75 0.74371859 0.73913043 0.71162791 0.73891626
|
|
0.71559633 0.73809524 0.72477064 0.70982143]
|
|
|
|
mean value: 0.7293374943233091
|
|
|
|
key: test_recall
|
|
value: [0.69565217 0.65217391 0.69565217 0.73913043 0.82608696 0.7826087
|
|
0.60869565 0.73913043 0.69565217 0.65217391]
|
|
|
|
mean value: 0.7086956521739131
|
|
|
|
key: train_recall
|
|
value: [0.73913043 0.69565217 0.71497585 0.73913043 0.73913043 0.72463768
|
|
0.75362319 0.74879227 0.76328502 0.76811594]
|
|
|
|
mean value: 0.7386473429951691
|
|
|
|
key: test_roc_auc
|
|
value: [0.70720109 0.76358696 0.72282609 0.74456522 0.81929348 0.76630435
|
|
0.71059783 0.74053296 0.76718093 0.68092567]
|
|
|
|
mean value: 0.7423014551192146
|
|
|
|
key: train_roc_auc
|
|
value: [0.76605645 0.76361556 0.76801424 0.77482838 0.76079329 0.76933638
|
|
0.76803966 0.77824229 0.77674741 0.77042161]
|
|
|
|
mean value: 0.7696095259939654
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.55555556 0.51612903 0.5483871 0.65517241 0.58064516
|
|
0.48275862 0.5483871 0.57142857 0.46875 ]
|
|
|
|
mean value: 0.542721354856366
|
|
|
|
key: train_jcc
|
|
value: [0.57518797 0.56470588 0.57364341 0.5862069 0.56877323 0.57692308
|
|
0.57992565 0.59160305 0.5917603 0.58455882]
|
|
|
|
mean value: 0.5793288297953626
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01503873 0.01986313 0.02682614 0.02075624 0.02262497 0.02537298
|
|
0.01910257 0.02685332 0.02298331 0.02089858]
|
|
|
|
mean value: 0.022031998634338378
|
|
|
|
key: score_time
|
|
value: [0.01068616 0.01125503 0.01192212 0.01192665 0.01190472 0.01192307
|
|
0.01186848 0.01198268 0.01190519 0.0118885 ]
|
|
|
|
mean value: 0.0117262601852417
|
|
|
|
key: test_mcc
|
|
value: [0.54964723 0.49540572 0.51276506 0.43420774 0.73839363 0.61131498
|
|
0.36490022 0.64953583 0.61883928 0.60137424]
|
|
|
|
mean value: 0.5576383940609018
|
|
|
|
key: train_mcc
|
|
value: [0.68569252 0.50562368 0.69831943 0.61884838 0.67762357 0.75564617
|
|
0.64996995 0.76602498 0.70329096 0.65621393]
|
|
|
|
mean value: 0.671725355999401
|
|
|
|
key: test_accuracy
|
|
value: [0.78181818 0.67272727 0.70909091 0.67272727 0.87272727 0.8
|
|
0.69090909 0.81481481 0.81481481 0.7962963 ]
|
|
|
|
mean value: 0.7625925925925926
|
|
|
|
key: train_accuracy
|
|
value: [0.84756098 0.68699187 0.83536585 0.7703252 0.83943089 0.87398374
|
|
0.82723577 0.88235294 0.85598377 0.8296146 ]
|
|
|
|
mean value: 0.8248845627401508
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.71875 0.73333333 0.7 0.8372093 0.78431373
|
|
0.48484848 0.80769231 0.77272727 0.7027027 ]
|
|
|
|
mean value: 0.7241577129119878
|
|
|
|
key: train_fscore
|
|
value: [0.80818414 0.72695035 0.83018868 0.78393881 0.77994429 0.86222222
|
|
0.76454294 0.86757991 0.82555283 0.76536313]
|
|
|
|
mean value: 0.8014467302533417
|
|
|
|
key: test_precision
|
|
value: [0.82352941 0.56097561 0.59459459 0.56756757 0.9 0.71428571
|
|
0.8 0.72413793 0.80952381 0.92857143]
|
|
|
|
mean value: 0.7423186067098401
|
|
|
|
key: train_precision
|
|
value: [0.85869565 0.57422969 0.73333333 0.64873418 0.92105263 0.79835391
|
|
0.8961039 0.82251082 0.84 0.90728477]
|
|
|
|
mean value: 0.8000298882469794
|
|
|
|
key: test_recall
|
|
value: [0.60869565 1. 0.95652174 0.91304348 0.7826087 0.86956522
|
|
0.34782609 0.91304348 0.73913043 0.56521739]
|
|
|
|
mean value: 0.7695652173913043
|
|
|
|
key: train_recall
|
|
value: [0.76328502 0.99033816 0.95652174 0.99033816 0.6763285 0.93719807
|
|
0.66666667 0.9178744 0.8115942 0.66183575]
|
|
|
|
mean value: 0.8371980676328502
|
|
|
|
key: test_roc_auc
|
|
value: [0.75747283 0.71875 0.74388587 0.70652174 0.86005435 0.80978261
|
|
0.64266304 0.82748948 0.80504909 0.76647966]
|
|
|
|
mean value: 0.7638148667601683
|
|
|
|
key: train_roc_auc
|
|
value: [0.83602848 0.72850242 0.85194508 0.80043224 0.81711162 0.88263412
|
|
0.80526316 0.88725888 0.84985305 0.80644235]
|
|
|
|
mean value: 0.826547138343477
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.56097561 0.57894737 0.53846154 0.72 0.64516129
|
|
0.32 0.67741935 0.62962963 0.54166667]
|
|
|
|
mean value: 0.5750722996557813
|
|
|
|
key: train_jcc
|
|
value: [0.67811159 0.57103064 0.70967742 0.64465409 0.63926941 0.7578125
|
|
0.61883408 0.76612903 0.70292887 0.6199095 ]
|
|
|
|
mean value: 0.6708357127980087
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02388692 0.02631426 0.02282667 0.0212965 0.02375722 0.02388358
|
|
0.02595091 0.02372169 0.02637124 0.02537918]
|
|
|
|
mean value: 0.024338817596435545
|
|
|
|
key: score_time
|
|
value: [0.01199365 0.01192498 0.01190543 0.01196766 0.01190972 0.01190233
|
|
0.01212358 0.01190972 0.01263452 0.01318789]
|
|
|
|
mean value: 0.01214594841003418
|
|
|
|
key: test_mcc
|
|
value: [0.46046933 0.42210145 0.47166751 0.62311394 0.48270989 0.22282609
|
|
0.38208785 0.63798041 0.61030357 0.50530306]
|
|
|
|
mean value: 0.4818563094696725
|
|
|
|
key: train_mcc
|
|
value: [0.66191422 0.55427156 0.5437853 0.48759994 0.59774701 0.40770208
|
|
0.68941608 0.76921545 0.6461682 0.72594029]
|
|
|
|
mean value: 0.6083760151782537
|
|
|
|
key: test_accuracy
|
|
value: [0.72727273 0.70909091 0.65454545 0.8 0.74545455 0.63636364
|
|
0.69090909 0.7962963 0.77777778 0.75925926]
|
|
|
|
mean value: 0.7296969696969697
|
|
|
|
key: train_accuracy
|
|
value: [0.83130081 0.76626016 0.71544715 0.73170732 0.79065041 0.68699187
|
|
0.84552846 0.87626775 0.79513185 0.86409736]
|
|
|
|
mean value: 0.7903383136265439
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.5 0.70769231 0.68571429 0.61111111 0.375
|
|
0.4516129 0.8 0.78571429 0.71111111]
|
|
|
|
mean value: 0.6173410550023453
|
|
|
|
key: train_fscore
|
|
value: [0.76619718 0.62295082 0.74545455 0.54166667 0.67711599 0.40769231
|
|
0.79005525 0.86825054 0.8 0.84454756]
|
|
|
|
mean value: 0.706393086242575
|
|
|
|
key: test_precision
|
|
value: [0.9 0.88888889 0.54761905 1. 0.84615385 0.66666667
|
|
0.875 0.6875 0.66666667 0.72727273]
|
|
|
|
mean value: 0.7805767843267843
|
|
|
|
key: train_precision
|
|
value: [0.91891892 0.96938776 0.59766764 0.96296296 0.96428571 1.
|
|
0.92258065 0.78515625 0.67785235 0.8125 ]
|
|
|
|
mean value: 0.861131223390818
|
|
|
|
key: test_recall
|
|
value: [0.39130435 0.34782609 1. 0.52173913 0.47826087 0.26086957
|
|
0.30434783 0.95652174 0.95652174 0.69565217]
|
|
|
|
mean value: 0.591304347826087
|
|
|
|
key: train_recall
|
|
value: [0.65700483 0.4589372 0.99033816 0.37681159 0.52173913 0.25603865
|
|
0.69082126 0.97101449 0.97584541 0.87922705]
|
|
|
|
mean value: 0.6777777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.68002717 0.65828804 0.703125 0.76086957 0.70788043 0.58355978
|
|
0.63654891 0.81697055 0.80084151 0.75105189]
|
|
|
|
mean value: 0.7099162868162693
|
|
|
|
key: train_roc_auc
|
|
value: [0.80744978 0.72420544 0.75306382 0.68314264 0.75385202 0.62801932
|
|
0.824358 0.8893534 0.82009054 0.86618695]
|
|
|
|
mean value: 0.774972191551139
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.33333333 0.54761905 0.52173913 0.44 0.23076923
|
|
0.29166667 0.66666667 0.64705882 0.55172414]
|
|
|
|
mean value: 0.4605577036950174
|
|
|
|
key: train_jcc
|
|
value: [0.62100457 0.45238095 0.5942029 0.37142857 0.51184834 0.25603865
|
|
0.65296804 0.76717557 0.66666667 0.73092369]
|
|
|
|
mean value: 0.5624637947640064
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19745541 0.18153834 0.18095517 0.18258262 0.18196273 0.18145657
|
|
0.18402338 0.20013261 0.21202493 0.21415091]
|
|
|
|
mean value: 0.19162826538085936
|
|
|
|
key: score_time
|
|
value: [0.01559019 0.0153532 0.01577616 0.01554918 0.01544118 0.01554728
|
|
0.01582265 0.01741314 0.01674986 0.01804852]
|
|
|
|
mean value: 0.016129136085510254
|
|
|
|
key: test_mcc
|
|
value: [0.70187922 0.82153646 0.78065376 0.64214885 0.78961518 0.74770557
|
|
0.66559476 0.6970547 0.72464276 0.82092207]
|
|
|
|
mean value: 0.7391753321463528
|
|
|
|
key: train_mcc
|
|
value: [0.94597304 0.93750179 0.95410967 0.95022532 0.96671882 0.94996186
|
|
0.95022532 0.95845152 0.94591604 0.9460534 ]
|
|
|
|
mean value: 0.9505136775538949
|
|
|
|
key: test_accuracy
|
|
value: [0.85454545 0.90909091 0.89090909 0.81818182 0.89090909 0.87272727
|
|
0.83636364 0.85185185 0.85185185 0.90740741]
|
|
|
|
mean value: 0.8683838383838384
|
|
|
|
key: train_accuracy
|
|
value: [0.97357724 0.9695122 0.97764228 0.97560976 0.98373984 0.97560976
|
|
0.97560976 0.97971602 0.97363083 0.97363083]
|
|
|
|
mean value: 0.9758278500634905
|
|
|
|
key: test_fscore
|
|
value: [0.80952381 0.89795918 0.875 0.8 0.88 0.85714286
|
|
0.7804878 0.82608696 0.84615385 0.87804878]
|
|
|
|
mean value: 0.8450403238381575
|
|
|
|
key: train_fscore
|
|
value: [0.96882494 0.96385542 0.97336562 0.97129187 0.98076923 0.97101449
|
|
0.97129187 0.97596154 0.9686747 0.96882494]
|
|
|
|
mean value: 0.9713874612053074
|
|
|
|
key: test_precision
|
|
value: [0.89473684 0.84615385 0.84 0.74074074 0.81481481 0.80769231
|
|
0.88888889 0.82608696 0.75862069 1. ]
|
|
|
|
mean value: 0.8417735086572773
|
|
|
|
key: train_precision
|
|
value: [0.96190476 0.96153846 0.97572816 0.96208531 0.97607656 0.97101449
|
|
0.96208531 0.97129187 0.96634615 0.96190476]
|
|
|
|
mean value: 0.9669975824453944
|
|
|
|
key: test_recall
|
|
value: [0.73913043 0.95652174 0.91304348 0.86956522 0.95652174 0.91304348
|
|
0.69565217 0.82608696 0.95652174 0.7826087 ]
|
|
|
|
mean value: 0.8608695652173913
|
|
|
|
key: train_recall
|
|
value: [0.97584541 0.96618357 0.97101449 0.98067633 0.98550725 0.97101449
|
|
0.98067633 0.98067633 0.97101449 0.97584541]
|
|
|
|
mean value: 0.9758454106280193
|
|
|
|
key: test_roc_auc
|
|
value: [0.83831522 0.91576087 0.89402174 0.82540761 0.90013587 0.87839674
|
|
0.81657609 0.84852735 0.86535764 0.89130435]
|
|
|
|
mean value: 0.8673803471248247
|
|
|
|
key: train_roc_auc
|
|
value: [0.97388762 0.9690567 0.97673532 0.97630308 0.98398169 0.97498093
|
|
0.97630308 0.97984865 0.97326948 0.97393669]
|
|
|
|
mean value: 0.975830324011102
|
|
|
|
key: test_jcc
|
|
value: [0.68 0.81481481 0.77777778 0.66666667 0.78571429 0.75
|
|
0.64 0.7037037 0.73333333 0.7826087 ]
|
|
|
|
mean value: 0.7334619277662756
|
|
|
|
key: train_jcc
|
|
value: [0.93953488 0.93023256 0.94811321 0.94418605 0.96226415 0.94366197
|
|
0.94418605 0.95305164 0.93925234 0.93953488]
|
|
|
|
mean value: 0.9444017728567289
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07939768 0.05831194 0.07030249 0.07544374 0.10521579 0.0780127
|
|
0.09002948 0.08250356 0.08563209 0.09377265]
|
|
|
|
mean value: 0.08186221122741699
|
|
|
|
key: score_time
|
|
value: [0.02362895 0.01924038 0.02084756 0.02889061 0.01976109 0.01948786
|
|
0.03774977 0.02285075 0.03961301 0.02583504]
|
|
|
|
mean value: 0.02579050064086914
|
|
|
|
key: test_mcc
|
|
value: [0.66176788 0.92870878 0.74770557 0.74055136 0.70662625 0.70662625
|
|
0.81260451 0.62011507 0.78645618 0.890415 ]
|
|
|
|
mean value: 0.7601576842240911
|
|
|
|
key: train_mcc
|
|
value: [0.97918627 0.97080023 0.98332062 0.96681345 0.98749079 0.98749079
|
|
0.97505096 0.98334516 0.98340136 0.9875091 ]
|
|
|
|
mean value: 0.9804408715224007
|
|
|
|
key: test_accuracy
|
|
value: [0.83636364 0.96363636 0.87272727 0.87272727 0.85454545 0.85454545
|
|
0.90909091 0.81481481 0.88888889 0.94444444]
|
|
|
|
mean value: 0.8811784511784512
|
|
|
|
key: train_accuracy
|
|
value: [0.9898374 0.98577236 0.99186992 0.98373984 0.99390244 0.99390244
|
|
0.98780488 0.99188641 0.99188641 0.99391481]
|
|
|
|
mean value: 0.9904516895067531
|
|
|
|
key: test_fscore
|
|
value: [0.79069767 0.95833333 0.85714286 0.85106383 0.83333333 0.83333333
|
|
0.88888889 0.76190476 0.88 0.93023256]
|
|
|
|
mean value: 0.8584930570281881
|
|
|
|
key: train_fscore
|
|
value: [0.98783455 0.98305085 0.99033816 0.98039216 0.99273608 0.99273608
|
|
0.98536585 0.99033816 0.99038462 0.99273608]
|
|
|
|
mean value: 0.9885912584189805
|
|
|
|
key: test_precision
|
|
value: [0.85 0.92 0.80769231 0.83333333 0.8 0.8
|
|
0.90909091 0.84210526 0.81481481 1. ]
|
|
|
|
mean value: 0.857703662808926
|
|
|
|
key: train_precision
|
|
value: [0.99509804 0.98543689 0.99033816 0.99502488 0.99514563 0.99514563
|
|
0.99507389 0.99033816 0.98564593 0.99514563]
|
|
|
|
mean value: 0.9922392854387729
|
|
|
|
key: test_recall
|
|
value: [0.73913043 1. 0.91304348 0.86956522 0.86956522 0.86956522
|
|
0.86956522 0.69565217 0.95652174 0.86956522]
|
|
|
|
mean value: 0.8652173913043478
|
|
|
|
key: train_recall
|
|
value: [0.98067633 0.98067633 0.99033816 0.96618357 0.99033816 0.99033816
|
|
0.97584541 0.99033816 0.99516908 0.99033816]
|
|
|
|
mean value: 0.985024154589372
|
|
|
|
key: test_roc_auc
|
|
value: [0.82269022 0.96875 0.87839674 0.87228261 0.85665761 0.85665761
|
|
0.90353261 0.79943899 0.89761571 0.93478261]
|
|
|
|
mean value: 0.8790804698457223
|
|
|
|
key: train_roc_auc
|
|
value: [0.98858378 0.98507501 0.99166031 0.9813374 0.9934147 0.9934147
|
|
0.98616832 0.99167258 0.99233979 0.99342083]
|
|
|
|
mean value: 0.9897087402808227
|
|
|
|
key: test_jcc
|
|
value: [0.65384615 0.92 0.75 0.74074074 0.71428571 0.71428571
|
|
0.8 0.61538462 0.78571429 0.86956522]
|
|
|
|
mean value: 0.7563822441648529
|
|
|
|
key: train_jcc
|
|
value: [0.97596154 0.96666667 0.98086124 0.96153846 0.98557692 0.98557692
|
|
0.97115385 0.98086124 0.98095238 0.98557692]
|
|
|
|
mean value: 0.977472615104194
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16095352 0.21311998 0.11064672 0.18200755 0.18301582 0.18330669
|
|
0.18382049 0.18628192 0.18736124 0.20192957]
|
|
|
|
mean value: 0.17924435138702394
|
|
|
|
key: score_time
|
|
value: [0.01886606 0.02653193 0.0213213 0.0254035 0.02533579 0.02538133
|
|
0.02534842 0.02519035 0.02613616 0.02551413]
|
|
|
|
mean value: 0.024502897262573244
|
|
|
|
key: test_mcc
|
|
value: [0.39011901 0.51163988 0.24416604 0.44324972 0.43189061 0.4105162
|
|
0.43540317 0.10592543 0.30642689 0.38376294]
|
|
|
|
mean value: 0.3663099878731091
|
|
|
|
key: train_mcc
|
|
value: [0.96671882 0.94992874 0.96671882 0.9708388 0.94996186 0.95410967
|
|
0.96245495 0.96255992 0.95000174 0.96255992]
|
|
|
|
mean value: 0.9595853232398288
|
|
|
|
key: test_accuracy
|
|
value: [0.70909091 0.76363636 0.63636364 0.72727273 0.72727273 0.70909091
|
|
0.72727273 0.57407407 0.66666667 0.7037037 ]
|
|
|
|
mean value: 0.6944444444444444
|
|
|
|
key: train_accuracy
|
|
value: [0.98373984 0.97560976 0.98373984 0.98577236 0.97560976 0.97764228
|
|
0.98170732 0.98174442 0.97565923 0.98174442]
|
|
|
|
mean value: 0.9802969211233694
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.71111111 0.54545455 0.68085106 0.65116279 0.66666667
|
|
0.59459459 0.43902439 0.57142857 0.6 ]
|
|
|
|
mean value: 0.6060293734026854
|
|
|
|
key: train_fscore
|
|
value: [0.98076923 0.97087379 0.98076923 0.98313253 0.97101449 0.97336562
|
|
0.97820823 0.97831325 0.97087379 0.97831325]
|
|
|
|
mean value: 0.9765633413131132
|
|
|
|
key: test_precision
|
|
value: [0.70588235 0.72727273 0.57142857 0.66666667 0.7 0.64
|
|
0.78571429 0.5 0.63157895 0.70588235]
|
|
|
|
mean value: 0.6634425904333026
|
|
|
|
key: train_precision
|
|
value: [0.97607656 0.97560976 0.97607656 0.98076923 0.97101449 0.97572816
|
|
0.98058252 0.97596154 0.97560976 0.97596154]
|
|
|
|
mean value: 0.976339010230055
|
|
|
|
key: test_recall
|
|
value: [0.52173913 0.69565217 0.52173913 0.69565217 0.60869565 0.69565217
|
|
0.47826087 0.39130435 0.52173913 0.52173913]
|
|
|
|
mean value: 0.5652173913043478
|
|
|
|
key: train_recall
|
|
value: [0.98550725 0.96618357 0.98550725 0.98550725 0.97101449 0.97101449
|
|
0.97584541 0.98067633 0.96618357 0.98067633]
|
|
|
|
mean value: 0.9768115942028985
|
|
|
|
key: test_roc_auc
|
|
value: [0.68274457 0.75407609 0.62024457 0.72282609 0.71059783 0.70720109
|
|
0.69225543 0.55049088 0.64796634 0.6802244 ]
|
|
|
|
mean value: 0.6768627279102384
|
|
|
|
key: train_roc_auc
|
|
value: [0.98398169 0.97431986 0.98398169 0.98573608 0.97498093 0.97673532
|
|
0.98090516 0.98159691 0.97435053 0.98159691]
|
|
|
|
mean value: 0.9798185071983698
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.55172414 0.375 0.51612903 0.48275862 0.5
|
|
0.42307692 0.28125 0.4 0.42857143]
|
|
|
|
mean value: 0.43870815710985345
|
|
|
|
key: train_jcc
|
|
value: [0.96226415 0.94339623 0.96226415 0.96682464 0.94366197 0.94811321
|
|
0.95734597 0.95754717 0.94339623 0.95754717]
|
|
|
|
mean value: 0.9542360889831523
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.75554276 0.74635649 0.74191046 0.74311304 0.73715711 0.75417137
|
|
0.74482107 0.73726082 0.75659513 0.74458671]
|
|
|
|
mean value: 0.7461514949798584
|
|
|
|
key: score_time
|
|
value: [0.00954223 0.0093863 0.00966144 0.00947142 0.00959969 0.01022458
|
|
0.00977397 0.00938845 0.00977755 0.00960946]
|
|
|
|
mean value: 0.00964350700378418
|
|
|
|
key: test_mcc
|
|
value: [0.66559476 0.85468127 0.82153646 0.78961518 0.74770557 0.82153646
|
|
0.77526165 0.5802059 0.68012012 0.890415 ]
|
|
|
|
mean value: 0.762667238203967
|
|
|
|
key: train_mcc
|
|
value: [0.99584156 0.99166031 0.99583607 0.99583607 0.99166031 1.
|
|
0.99168385 1. 1. 1. ]
|
|
|
|
mean value: 0.9962518155398403
|
|
|
|
key: test_accuracy
|
|
value: [0.83636364 0.92727273 0.90909091 0.89090909 0.87272727 0.90909091
|
|
0.89090909 0.7962963 0.83333333 0.94444444]
|
|
|
|
mean value: 0.8810437710437711
|
|
|
|
key: train_accuracy
|
|
value: [0.99796748 0.99593496 0.99796748 0.99796748 0.99593496 1.
|
|
0.99593496 1. 1. 1. ]
|
|
|
|
mean value: 0.9981707317073171
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.91666667 0.89795918 0.88 0.85714286 0.89795918
|
|
0.86363636 0.74418605 0.82352941 0.93023256]
|
|
|
|
mean value: 0.8591800076086744
|
|
|
|
key: train_fscore
|
|
value: [0.99759036 0.99516908 0.99757869 0.99757869 0.99516908 1.
|
|
0.99514563 1. 1. 1. ]
|
|
|
|
mean value: 0.9978231541752846
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.88 0.84615385 0.81481481 0.80769231 0.84615385
|
|
0.9047619 0.8 0.75 1. ]
|
|
|
|
mean value: 0.8538465608465609
|
|
|
|
key: train_precision
|
|
value: [0.99519231 0.99516908 1. 1. 0.99516908 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985530471943516
|
|
|
|
key: test_recall
|
|
value: [0.69565217 0.95652174 0.95652174 0.95652174 0.91304348 0.95652174
|
|
0.82608696 0.69565217 0.91304348 0.86956522]
|
|
|
|
mean value: 0.8739130434782608
|
|
|
|
key: train_recall
|
|
value: [1. 0.99516908 0.99516908 0.99516908 0.99516908 1.
|
|
0.99033816 1. 1. 1. ]
|
|
|
|
mean value: 0.9971014492753623
|
|
|
|
key: test_roc_auc
|
|
value: [0.81657609 0.93138587 0.91576087 0.90013587 0.87839674 0.91576087
|
|
0.88179348 0.78330996 0.84361851 0.93478261]
|
|
|
|
mean value: 0.8801520862552594
|
|
|
|
key: train_roc_auc
|
|
value: [0.99824561 0.99583016 0.99758454 0.99758454 0.99583016 1.
|
|
0.99516908 1. 1. 1. ]
|
|
|
|
mean value: 0.9980244088482074
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.84615385 0.81481481 0.78571429 0.75 0.81481481
|
|
0.76 0.59259259 0.7 0.86956522]
|
|
|
|
mean value: 0.7573655571481658
|
|
|
|
key: train_jcc
|
|
value: [0.99519231 0.99038462 0.99516908 0.99516908 0.99038462 1.
|
|
0.99033816 1. 1. 1. ]
|
|
|
|
mean value: 0.9956637866963954
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0453124 0.03122139 0.03177643 0.03254819 0.03172731 0.03109336
|
|
0.03182578 0.03118396 0.0318737 0.03185582]
|
|
|
|
mean value: 0.03304183483123779
|
|
|
|
key: score_time
|
|
value: [0.01310349 0.01280475 0.01356483 0.01508045 0.01738858 0.01533246
|
|
0.01550055 0.01521564 0.01526809 0.01532745]
|
|
|
|
mean value: 0.014858627319335937
|
|
|
|
key: test_mcc
|
|
value: [0.13987572 0.07608696 0.00162269 0.26809513 0.32375563 0.34977196
|
|
0.24522056 0.0837414 0.28025234 0.10942918]
|
|
|
|
mean value: 0.18778515831908477
|
|
|
|
key: train_mcc
|
|
value: [0.32061883 0.32955723 0.33251059 0.30850027 0.30543128 0.30543128
|
|
0.32061883 0.32267058 0.30454779 0.31065012]
|
|
|
|
mean value: 0.31605367928568373
|
|
|
|
key: test_accuracy
|
|
value: [0.47272727 0.47272727 0.45454545 0.50909091 0.54545455 0.56363636
|
|
0.52727273 0.48148148 0.57407407 0.48148148]
|
|
|
|
mean value: 0.5082491582491582
|
|
|
|
key: train_accuracy
|
|
value: [0.54471545 0.55081301 0.55284553 0.53658537 0.53455285 0.53455285
|
|
0.54471545 0.54563895 0.53346856 0.53752535]
|
|
|
|
mean value: 0.5415413347845446
|
|
|
|
key: test_fscore
|
|
value: [0.60273973 0.57971014 0.54545455 0.63013699 0.64788732 0.65714286
|
|
0.62857143 0.58823529 0.64615385 0.6 ]
|
|
|
|
mean value: 0.6126032152640289
|
|
|
|
key: train_fscore
|
|
value: [0.64890282 0.6519685 0.65299685 0.64485981 0.64385692 0.64385692
|
|
0.64890282 0.64890282 0.64285714 0.64485981]
|
|
|
|
mean value: 0.6471964423706671
|
|
|
|
key: test_precision
|
|
value: [0.44 0.43478261 0.41860465 0.46 0.47916667 0.4893617
|
|
0.46808511 0.44444444 0.5 0.44680851]
|
|
|
|
mean value: 0.458125369011849
|
|
|
|
key: train_precision
|
|
value: [0.48027842 0.48364486 0.48477752 0.47586207 0.47477064 0.47477064
|
|
0.48027842 0.48027842 0.47368421 0.47586207]
|
|
|
|
mean value: 0.47842072770598526
|
|
|
|
key: test_recall
|
|
value: [0.95652174 0.86956522 0.7826087 1. 1. 1.
|
|
0.95652174 0.86956522 0.91304348 0.91304348]
|
|
|
|
mean value: 0.9260869565217391
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.54076087 0.52853261 0.50067935 0.578125 0.609375 0.625
|
|
0.58763587 0.5315568 0.61781206 0.5371669 ]
|
|
|
|
mean value: 0.5656644460028051
|
|
|
|
key: train_roc_auc
|
|
value: [0.60701754 0.6122807 0.61403509 0.6 0.59824561 0.59824561
|
|
0.60701754 0.60839161 0.5979021 0.6013986 ]
|
|
|
|
mean value: 0.6044534412955466
|
|
|
|
key: test_jcc
|
|
value: [0.43137255 0.40816327 0.375 0.46 0.47916667 0.4893617
|
|
0.45833333 0.41666667 0.47727273 0.42857143]
|
|
|
|
mean value: 0.44239083389642125
|
|
|
|
key: train_jcc
|
|
value: [0.48027842 0.48364486 0.48477752 0.47586207 0.47477064 0.47477064
|
|
0.48027842 0.48027842 0.47368421 0.47586207]
|
|
|
|
mean value: 0.47842072770598526
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02550864 0.01593661 0.02783704 0.01576757 0.01569366 0.02469158
|
|
0.01627493 0.01625252 0.04488945 0.03936982]
|
|
|
|
mean value: 0.024222183227539062
|
|
|
|
key: score_time
|
|
value: [0.0141151 0.01227522 0.01901174 0.01219106 0.01244736 0.01307368
|
|
0.01247072 0.01240945 0.0189209 0.01890063]
|
|
|
|
mean value: 0.014581584930419922
|
|
|
|
key: test_mcc
|
|
value: [0.62436244 0.59190054 0.50741958 0.56841568 0.63259873 0.65508136
|
|
0.51203338 0.66901612 0.66155709 0.54503297]
|
|
|
|
mean value: 0.5967417897513163
|
|
|
|
key: train_mcc
|
|
value: [0.75934031 0.76482813 0.78384839 0.74766434 0.74663867 0.76868204
|
|
0.7969552 0.8060331 0.76963028 0.78896998]
|
|
|
|
mean value: 0.7732590448828639
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.8 0.74545455 0.78181818 0.81818182 0.81818182
|
|
0.76363636 0.83333333 0.83333333 0.77777778]
|
|
|
|
mean value: 0.798989898989899
|
|
|
|
key: train_accuracy
|
|
value: [0.88211382 0.88414634 0.89430894 0.87601626 0.87601626 0.88617886
|
|
0.9004065 0.90466531 0.88640974 0.89655172]
|
|
|
|
mean value: 0.8886813766717789
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.76595745 0.73076923 0.76 0.79166667 0.80769231
|
|
0.66666667 0.81632653 0.80851064 0.7 ]
|
|
|
|
mean value: 0.7609494249418262
|
|
|
|
key: train_fscore
|
|
value: [0.86190476 0.86588235 0.87559809 0.85579196 0.85441527 0.86792453
|
|
0.88361045 0.88888889 0.8685446 0.87885986]
|
|
|
|
mean value: 0.870142076452663
|
|
|
|
key: test_precision
|
|
value: [0.84210526 0.75 0.65517241 0.7037037 0.76 0.72413793
|
|
0.8125 0.76923077 0.79166667 0.82352941]
|
|
|
|
mean value: 0.7632046159351327
|
|
|
|
key: train_precision
|
|
value: [0.84976526 0.8440367 0.86729858 0.83796296 0.84433962 0.84792627
|
|
0.86915888 0.87037037 0.84474886 0.86448598]
|
|
|
|
mean value: 0.8540093475179242
|
|
|
|
key: test_recall
|
|
value: [0.69565217 0.7826087 0.82608696 0.82608696 0.82608696 0.91304348
|
|
0.56521739 0.86956522 0.82608696 0.60869565]
|
|
|
|
mean value: 0.7739130434782608
|
|
|
|
key: train_recall
|
|
value: [0.87439614 0.88888889 0.88405797 0.87439614 0.8647343 0.88888889
|
|
0.89855072 0.90821256 0.89371981 0.89371981]
|
|
|
|
mean value: 0.8869565217391304
|
|
|
|
key: test_roc_auc
|
|
value: [0.80095109 0.79755435 0.75679348 0.78804348 0.81929348 0.83152174
|
|
0.7357337 0.83800842 0.83239832 0.75596073]
|
|
|
|
mean value: 0.7956258765778401
|
|
|
|
key: train_roc_auc
|
|
value: [0.88105772 0.88479532 0.89290618 0.87579456 0.87447241 0.88654971
|
|
0.90015256 0.90515523 0.88741934 0.8961606 ]
|
|
|
|
mean value: 0.8884463629429304
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.62068966 0.57575758 0.61290323 0.65517241 0.67741935
|
|
0.5 0.68965517 0.67857143 0.53846154]
|
|
|
|
mean value: 0.616401498019963
|
|
|
|
key: train_jcc
|
|
value: [0.75732218 0.76348548 0.7787234 0.74793388 0.74583333 0.76666667
|
|
0.79148936 0.8 0.76763485 0.78389831]
|
|
|
|
mean value: 0.7702987463022138
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.21079087 0.22206306 0.3546133 0.26670313 0.21922231 0.24915075
|
|
0.30519938 0.30639625 0.29098701 0.28081584]
|
|
|
|
mean value: 0.2705941915512085
|
|
|
|
key: score_time
|
|
value: [0.0122683 0.01256919 0.02083921 0.01405358 0.01921558 0.01912498
|
|
0.01912189 0.01893353 0.02347326 0.02534485]
|
|
|
|
mean value: 0.018494439125061036
|
|
|
|
key: test_mcc
|
|
value: [0.62436244 0.59190054 0.50741958 0.56841568 0.63259873 0.65508136
|
|
0.54964723 0.66901612 0.66155709 0.65775818]
|
|
|
|
mean value: 0.6117756954470762
|
|
|
|
key: train_mcc
|
|
value: [0.75934031 0.76482813 0.78384839 0.74766434 0.74663867 0.76868204
|
|
0.81356476 0.8060331 0.76963028 0.79247793]
|
|
|
|
mean value: 0.7752707961049518
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.8 0.74545455 0.78181818 0.81818182 0.81818182
|
|
0.78181818 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8063636363636364
|
|
|
|
key: train_accuracy
|
|
value: [0.88211382 0.88414634 0.89430894 0.87601626 0.87601626 0.88617886
|
|
0.90853659 0.90466531 0.88640974 0.89858012]
|
|
|
|
mean value: 0.8896972245584525
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.76595745 0.73076923 0.76 0.79166667 0.80769231
|
|
0.7 0.81632653 0.80851064 0.79069767]
|
|
|
|
mean value: 0.77335252571702
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.86190476 0.86588235 0.87559809 0.85579196 0.85441527 0.86792453
|
|
0.89311164 0.88888889 0.8685446 0.88038278]
|
|
|
|
mean value: 0.8712444869812518
|
|
|
|
key: test_precision
|
|
value: [0.84210526 0.75 0.65517241 0.7037037 0.76 0.72413793
|
|
0.82352941 0.76923077 0.79166667 0.85 ]
|
|
|
|
mean value: 0.7669546159351326
|
|
|
|
key: train_precision
|
|
value: [0.84976526 0.8440367 0.86729858 0.83796296 0.84433962 0.84792627
|
|
0.87850467 0.87037037 0.84474886 0.87203791]
|
|
|
|
mean value: 0.8556991202955297
|
|
|
|
key: test_recall
|
|
value: [0.69565217 0.7826087 0.82608696 0.82608696 0.82608696 0.91304348
|
|
0.60869565 0.86956522 0.82608696 0.73913043]
|
|
|
|
mean value: 0.7913043478260869
|
|
|
|
key: train_recall
|
|
value: [0.87439614 0.88888889 0.88405797 0.87439614 0.8647343 0.88888889
|
|
0.90821256 0.90821256 0.89371981 0.88888889]
|
|
|
|
mean value: 0.8874396135265701
|
|
|
|
key: test_roc_auc
|
|
value: [0.80095109 0.79755435 0.75679348 0.78804348 0.81929348 0.83152174
|
|
0.75747283 0.83800842 0.83239832 0.82117812]
|
|
|
|
mean value: 0.8043215287517531
|
|
|
|
key: train_roc_auc
|
|
value: [0.88105772 0.88479532 0.89290618 0.87579456 0.87447241 0.88654971
|
|
0.90849225 0.90515523 0.88741934 0.89724165]
|
|
|
|
mean value: 0.889388436379283
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.62068966 0.57575758 0.61290323 0.65517241 0.67741935
|
|
0.53846154 0.68965517 0.67857143 0.65384615]
|
|
|
|
mean value: 0.6317861134045784
|
|
|
|
key: train_jcc
|
|
value: [0.75732218 0.76348548 0.7787234 0.74793388 0.74583333 0.76666667
|
|
0.80686695 0.8 0.76763485 0.78632479]
|
|
|
|
mean value: 0.7720791535349751
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0372808 0.03722477 0.04446268 0.03734994 0.03764439 0.03813338
|
|
0.03796172 0.0386374 0.03517652 0.03762078]
|
|
|
|
mean value: 0.038149237632751465
|
|
|
|
key: score_time
|
|
value: [0.01614666 0.02019405 0.01651931 0.01990747 0.01505542 0.01490974
|
|
0.01517749 0.01496434 0.01227784 0.0151968 ]
|
|
|
|
mean value: 0.016034913063049317
|
|
|
|
key: test_mcc
|
|
value: [0.625 0.62622429 0.53150959 0.8125 0.57258185 0.68245968
|
|
0.61982085 0.41661348 0.61445255 0.56449867]
|
|
|
|
mean value: 0.606566095735174
|
|
|
|
key: train_mcc
|
|
value: [0.72016767 0.71653529 0.72107594 0.73312189 0.73753515 0.7770742
|
|
0.74199614 0.72063201 0.7422532 0.73557441]
|
|
|
|
mean value: 0.7345965893718394
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.8125 0.765625 0.90625 0.77777778 0.84126984
|
|
0.80952381 0.6984127 0.79365079 0.77777778]
|
|
|
|
mean value: 0.7995287698412699
|
|
|
|
key: train_accuracy
|
|
value: [0.85964912 0.85789474 0.85964912 0.86491228 0.86865149 0.88791594
|
|
0.8704028 0.85989492 0.8704028 0.86690018]
|
|
|
|
mean value: 0.8666273389252466
|
|
|
|
key: test_fscore
|
|
value: [0.8125 0.81818182 0.76190476 0.90625 0.80555556 0.84375
|
|
0.81818182 0.73239437 0.81690141 0.75 ]
|
|
|
|
mean value: 0.8065619728471841
|
|
|
|
key: train_fscore
|
|
value: [0.8630137 0.86106346 0.86440678 0.87102178 0.87001733 0.89078498
|
|
0.87372014 0.86348123 0.87457627 0.87162162]
|
|
|
|
mean value: 0.8703707290626053
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.79411765 0.77419355 0.90625 0.725 0.84375
|
|
0.79411765 0.65 0.725 0.84 ]
|
|
|
|
mean value: 0.7864928842504744
|
|
|
|
key: train_precision
|
|
value: [0.84280936 0.84228188 0.83606557 0.83333333 0.85958904 0.86710963
|
|
0.85049834 0.84333333 0.84868421 0.84313725]
|
|
|
|
mean value: 0.8466841964126378
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.84375 0.75 0.90625 0.90625 0.84375
|
|
0.84375 0.83870968 0.93548387 0.67741935]
|
|
|
|
mean value: 0.8357862903225807
|
|
|
|
key: train_recall
|
|
value: [0.88421053 0.88070175 0.89473684 0.9122807 0.88070175 0.91578947
|
|
0.89824561 0.88461538 0.9020979 0.9020979 ]
|
|
|
|
mean value: 0.8955477855477856
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.8125 0.765625 0.90625 0.77570565 0.84122984
|
|
0.80897177 0.70060484 0.79586694 0.77620968]
|
|
|
|
mean value: 0.799546370967742
|
|
|
|
key: train_roc_auc
|
|
value: [0.85964912 0.85789474 0.85964912 0.86491228 0.86867256 0.88796467
|
|
0.87045148 0.85985155 0.8703472 0.86683842]
|
|
|
|
mean value: 0.8666231137283769
|
|
|
|
key: test_jcc
|
|
value: [0.68421053 0.69230769 0.61538462 0.82857143 0.6744186 0.72972973
|
|
0.69230769 0.57777778 0.69047619 0.6 ]
|
|
|
|
mean value: 0.6785184257522079
|
|
|
|
key: train_jcc
|
|
value: [0.75903614 0.7560241 0.76119403 0.77151335 0.76993865 0.80307692
|
|
0.77575758 0.75975976 0.77710843 0.77245509]
|
|
|
|
mean value: 0.7705864056386634
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.94220519 1.04799438 0.89575052 0.96442008 0.87544394 1.0187161
|
|
0.91170669 1.0358994 0.90370107 0.8602612 ]
|
|
|
|
mean value: 0.9456098556518555
|
|
|
|
key: score_time
|
|
value: [0.02014327 0.01540089 0.01545882 0.01552725 0.01543331 0.01552343
|
|
0.01533651 0.01330423 0.01524282 0.01570821]
|
|
|
|
mean value: 0.015707874298095705
|
|
|
|
key: test_mcc
|
|
value: [0.625 0.7276878 0.62994079 0.72192954 0.55909213 0.68245968
|
|
0.62325024 0.74772995 0.61895161 0.68352185]
|
|
|
|
mean value: 0.6619563581137236
|
|
|
|
key: train_mcc
|
|
value: [0.85618779 0.85315692 0.88486986 0.89473684 0.89527754 0.81249452
|
|
0.84607646 0.89868933 0.90912887 0.88532216]
|
|
|
|
mean value: 0.8735940298646814
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.859375 0.8125 0.859375 0.77777778 0.84126984
|
|
0.80952381 0.87301587 0.80952381 0.84126984]
|
|
|
|
mean value: 0.8296130952380952
|
|
|
|
key: train_accuracy
|
|
value: [0.92807018 0.92631579 0.94210526 0.94736842 0.9474606 0.90542907
|
|
0.92294221 0.94921191 0.95446585 0.94220665]
|
|
|
|
mean value: 0.9365575936338218
|
|
|
|
key: test_fscore
|
|
value: [0.8125 0.86956522 0.8 0.85245902 0.79411765 0.84375
|
|
0.82352941 0.875 0.80645161 0.83333333]
|
|
|
|
mean value: 0.8310706238844835
|
|
|
|
key: train_fscore
|
|
value: [0.92844677 0.92758621 0.94320138 0.94736842 0.94809689 0.90816327
|
|
0.92361111 0.94991364 0.9550173 0.94358974]
|
|
|
|
mean value: 0.9374994727336559
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.81081081 0.85714286 0.89655172 0.75 0.84375
|
|
0.77777778 0.84848485 0.80645161 0.86206897]
|
|
|
|
mean value: 0.8265538596774692
|
|
|
|
key: train_precision
|
|
value: [0.92361111 0.91186441 0.92567568 0.94736842 0.93515358 0.88118812
|
|
0.91408935 0.93856655 0.94520548 0.92307692]
|
|
|
|
mean value: 0.9245799619557747
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.9375 0.75 0.8125 0.84375 0.84375
|
|
0.875 0.90322581 0.80645161 0.80645161]
|
|
|
|
mean value: 0.8391129032258065
|
|
|
|
key: train_recall
|
|
value: [0.93333333 0.94385965 0.96140351 0.94736842 0.96140351 0.93684211
|
|
0.93333333 0.96153846 0.96503497 0.96503497]
|
|
|
|
mean value: 0.9509152251257514
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.859375 0.8125 0.859375 0.77671371 0.84122984
|
|
0.80846774 0.8734879 0.80947581 0.84072581]
|
|
|
|
mean value: 0.8293850806451613
|
|
|
|
key: train_roc_auc
|
|
value: [0.92807018 0.92631579 0.94210526 0.94736842 0.94748497 0.90548399
|
|
0.92296037 0.94919028 0.95444731 0.94216661]
|
|
|
|
mean value: 0.9365593178751074
|
|
|
|
key: test_jcc
|
|
value: [0.68421053 0.76923077 0.66666667 0.74285714 0.65853659 0.72972973
|
|
0.7 0.77777778 0.67567568 0.71428571]
|
|
|
|
mean value: 0.7118970587905119
|
|
|
|
key: train_jcc
|
|
value: [0.86644951 0.86495177 0.89250814 0.9 0.90131579 0.8317757
|
|
0.85806452 0.90460526 0.91390728 0.89320388]
|
|
|
|
mean value: 0.8826781861170421
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01452422 0.01175213 0.01055312 0.01048565 0.01047111 0.01041317
|
|
0.01043606 0.01045132 0.01038289 0.01034546]
|
|
|
|
mean value: 0.010981512069702149
|
|
|
|
key: score_time
|
|
value: [0.01245642 0.0096271 0.00930309 0.0090971 0.00905991 0.00908732
|
|
0.00912166 0.00913 0.00914192 0.00923848]
|
|
|
|
mean value: 0.009526300430297851
|
|
|
|
key: test_mcc
|
|
value: [0.60848698 0.62622429 0.56694671 0.59404013 0.60087592 0.46010298
|
|
0.47384924 0.31444802 0.43812738 0.50663549]
|
|
|
|
mean value: 0.5189737164571981
|
|
|
|
key: train_mcc
|
|
value: [0.56630842 0.56060684 0.5176281 0.56727781 0.56810211 0.51641866
|
|
0.50779222 0.62720405 0.57097112 0.64020763]
|
|
|
|
mean value: 0.5642516961897099
|
|
|
|
key: test_accuracy
|
|
value: [0.796875 0.8125 0.78125 0.796875 0.79365079 0.71428571
|
|
0.73015873 0.65079365 0.71428571 0.74603175]
|
|
|
|
mean value: 0.7536706349206349
|
|
|
|
key: train_accuracy
|
|
value: [0.78245614 0.77894737 0.75789474 0.78245614 0.78283713 0.74430823
|
|
0.74956217 0.81260946 0.78458844 0.8178634 ]
|
|
|
|
mean value: 0.7793523212584877
|
|
|
|
key: test_fscore
|
|
value: [0.81690141 0.81818182 0.76666667 0.79365079 0.81690141 0.76315789
|
|
0.70175439 0.68571429 0.73529412 0.7037037 ]
|
|
|
|
mean value: 0.7601926483167489
|
|
|
|
key: train_fscore
|
|
value: [0.78983051 0.78929766 0.76767677 0.79194631 0.79194631 0.77945619
|
|
0.72340426 0.82016807 0.79327731 0.82838284]
|
|
|
|
mean value: 0.7875386217571597
|
|
|
|
key: test_precision
|
|
value: [0.74358974 0.79411765 0.82142857 0.80645161 0.74358974 0.65909091
|
|
0.8 0.61538462 0.67567568 0.82608696]
|
|
|
|
mean value: 0.7485415475243047
|
|
|
|
key: train_precision
|
|
value: [0.76393443 0.75399361 0.73786408 0.75884244 0.75884244 0.68435013
|
|
0.80603448 0.78964401 0.76375405 0.784375 ]
|
|
|
|
mean value: 0.7601634675219903
|
|
|
|
key: test_recall
|
|
value: [0.90625 0.84375 0.71875 0.78125 0.90625 0.90625
|
|
0.625 0.77419355 0.80645161 0.61290323]
|
|
|
|
mean value: 0.7881048387096774
|
|
|
|
key: train_recall
|
|
value: [0.81754386 0.82807018 0.8 0.82807018 0.82807018 0.90526316
|
|
0.65614035 0.85314685 0.82517483 0.87762238]
|
|
|
|
mean value: 0.8219101950680898
|
|
|
|
key: test_roc_auc
|
|
value: [0.796875 0.8125 0.78125 0.796875 0.79183468 0.71118952
|
|
0.73185484 0.65272177 0.71572581 0.74395161]
|
|
|
|
mean value: 0.7534778225806451
|
|
|
|
key: train_roc_auc
|
|
value: [0.78245614 0.77894737 0.75789474 0.78245614 0.78291621 0.74458962
|
|
0.74939885 0.81253834 0.78451724 0.81775856]
|
|
|
|
mean value: 0.7793473193473194
|
|
|
|
key: test_jcc
|
|
value: [0.69047619 0.69230769 0.62162162 0.65789474 0.69047619 0.61702128
|
|
0.54054054 0.52173913 0.58139535 0.54285714]
|
|
|
|
mean value: 0.615632987098922
|
|
|
|
key: train_jcc
|
|
value: [0.65266106 0.6519337 0.62295082 0.65555556 0.65555556 0.63861386
|
|
0.56666667 0.6951567 0.65738162 0.70704225]
|
|
|
|
mean value: 0.6503517789195984
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01082611 0.01066971 0.01071644 0.01078057 0.01081586 0.01075149
|
|
0.01079583 0.01081753 0.01080179 0.01105452]
|
|
|
|
mean value: 0.010802984237670898
|
|
|
|
key: score_time
|
|
value: [0.00914764 0.00914502 0.00916004 0.00921893 0.00907278 0.00916004
|
|
0.00923276 0.00920725 0.00922751 0.0093658 ]
|
|
|
|
mean value: 0.009193778038024902
|
|
|
|
key: test_mcc
|
|
value: [0.5625 0.438357 0.2214702 0.56360186 0.53549564 0.61895161
|
|
0.49493401 0.34405576 0.48255984 0.4307759 ]
|
|
|
|
mean value: 0.46927018296024287
|
|
|
|
key: train_mcc
|
|
value: [0.53512128 0.56101149 0.55927353 0.54447222 0.5466585 0.53152779
|
|
0.56477321 0.53613782 0.54097122 0.54351524]
|
|
|
|
mean value: 0.5463462303428195
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.71875 0.609375 0.78125 0.76190476 0.80952381
|
|
0.74603175 0.66666667 0.73015873 0.71428571]
|
|
|
|
mean value: 0.7319196428571428
|
|
|
|
key: train_accuracy
|
|
value: [0.76491228 0.77894737 0.77894737 0.77017544 0.77232925 0.76532399
|
|
0.78108581 0.76707531 0.76882662 0.77057793]
|
|
|
|
mean value: 0.771820137032599
|
|
|
|
key: test_fscore
|
|
value: [0.78125 0.72727273 0.57627119 0.78787879 0.78873239 0.8125
|
|
0.76470588 0.69565217 0.76056338 0.68965517]
|
|
|
|
mean value: 0.7384481704919857
|
|
|
|
key: train_fscore
|
|
value: [0.78032787 0.79 0.78644068 0.78347107 0.78114478 0.77133106
|
|
0.79061977 0.77721943 0.78145695 0.78130217]
|
|
|
|
mean value: 0.7823313780270075
|
|
|
|
key: test_precision
|
|
value: [0.78125 0.70588235 0.62962963 0.76470588 0.71794872 0.8125
|
|
0.72222222 0.63157895 0.675 0.74074074]
|
|
|
|
mean value: 0.7181458493203849
|
|
|
|
key: train_precision
|
|
value: [0.73230769 0.75238095 0.76065574 0.740625 0.75080906 0.75083056
|
|
0.75641026 0.74598071 0.74213836 0.74760383]
|
|
|
|
mean value: 0.7479742171117733
|
|
|
|
key: test_recall
|
|
value: [0.78125 0.75 0.53125 0.8125 0.875 0.8125
|
|
0.8125 0.77419355 0.87096774 0.64516129]
|
|
|
|
mean value: 0.7665322580645161
|
|
|
|
key: train_recall
|
|
value: [0.83508772 0.83157895 0.81403509 0.83157895 0.81403509 0.79298246
|
|
0.82807018 0.81118881 0.82517483 0.81818182]
|
|
|
|
mean value: 0.8201913875598086
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.71875 0.609375 0.78125 0.76008065 0.80947581
|
|
0.74495968 0.66834677 0.73235887 0.71320565]
|
|
|
|
mean value: 0.7319052419354839
|
|
|
|
key: train_roc_auc
|
|
value: [0.76491228 0.77894737 0.77894737 0.77017544 0.77240216 0.76537235
|
|
0.78116795 0.76699791 0.76872776 0.77049442]
|
|
|
|
mean value: 0.7718145012881855
|
|
|
|
key: test_jcc
|
|
value: [0.64102564 0.57142857 0.4047619 0.65 0.65116279 0.68421053
|
|
0.61904762 0.53333333 0.61363636 0.52631579]
|
|
|
|
mean value: 0.5894922539720582
|
|
|
|
key: train_jcc
|
|
value: [0.63978495 0.65289256 0.64804469 0.64402174 0.64088398 0.62777778
|
|
0.65373961 0.63561644 0.64130435 0.64109589]
|
|
|
|
mean value: 0.6425161984547801
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00977778 0.01034784 0.00981355 0.01110673 0.01137733 0.01119494
|
|
0.01055646 0.01125455 0.01110768 0.01122093]
|
|
|
|
mean value: 0.01077578067779541
|
|
|
|
key: score_time
|
|
value: [0.01279974 0.01619864 0.01500916 0.01399326 0.01432085 0.01528668
|
|
0.0143702 0.01536059 0.01473594 0.01460624]
|
|
|
|
mean value: 0.014668130874633789
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.50097943 0.25819889 0.4375 0.36661779 0.39757328
|
|
0.40025188 0.34405576 0.33021346 0.56710881]
|
|
|
|
mean value: 0.4118897086766542
|
|
|
|
key: train_mcc
|
|
value: [0.64338976 0.61922715 0.63817508 0.64068622 0.66915977 0.66656936
|
|
0.65172079 0.60720621 0.65801299 0.62989052]
|
|
|
|
mean value: 0.6424037857751423
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.625 0.71875 0.68253968 0.6984127
|
|
0.6984127 0.66666667 0.65079365 0.77777778]
|
|
|
|
mean value: 0.7018353174603175
|
|
|
|
key: train_accuracy
|
|
value: [0.82105263 0.80877193 0.81754386 0.81929825 0.83187391 0.83187391
|
|
0.82486865 0.80035026 0.82837128 0.81260946]
|
|
|
|
mean value: 0.8196614127262113
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.74193548 0.66666667 0.71875 0.70588235 0.71641791
|
|
0.72463768 0.69565217 0.7027027 0.79411765]
|
|
|
|
mean value: 0.7244540396538339
|
|
|
|
key: train_fscore
|
|
value: [0.82653061 0.81556684 0.82608696 0.82630691 0.84158416 0.83892617
|
|
0.83108108 0.81433225 0.83389831 0.82372323]
|
|
|
|
mean value: 0.8278036514265043
|
|
|
|
key: test_precision
|
|
value: [0.7 0.76666667 0.6 0.71875 0.66666667 0.68571429
|
|
0.67567568 0.63157895 0.60465116 0.72972973]
|
|
|
|
mean value: 0.6779433134612143
|
|
|
|
key: train_precision
|
|
value: [0.8019802 0.7875817 0.78913738 0.79545455 0.79439252 0.80385852
|
|
0.80130293 0.76219512 0.80921053 0.7788162 ]
|
|
|
|
mean value: 0.79239296465173
|
|
|
|
key: test_recall
|
|
value: [0.875 0.71875 0.75 0.71875 0.75 0.75
|
|
0.78125 0.77419355 0.83870968 0.87096774]
|
|
|
|
mean value: 0.7827620967741935
|
|
|
|
key: train_recall
|
|
value: [0.85263158 0.84561404 0.86666667 0.85964912 0.89473684 0.87719298
|
|
0.86315789 0.87412587 0.86013986 0.87412587]
|
|
|
|
mean value: 0.8668040731198626
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.625 0.71875 0.68145161 0.69758065
|
|
0.69707661 0.66834677 0.65372984 0.77923387]
|
|
|
|
mean value: 0.702116935483871
|
|
|
|
key: train_roc_auc
|
|
value: [0.82105263 0.80877193 0.81754386 0.81929825 0.83198381 0.83195313
|
|
0.82493559 0.80022083 0.82831554 0.81250153]
|
|
|
|
mean value: 0.8196577107103422
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.58974359 0.5 0.56097561 0.54545455 0.55813953
|
|
0.56818182 0.53333333 0.54166667 0.65853659]
|
|
|
|
mean value: 0.5692395319749262
|
|
|
|
key: train_jcc
|
|
value: [0.70434783 0.68857143 0.7037037 0.70402299 0.72649573 0.72254335
|
|
0.71098266 0.68681319 0.71511628 0.70028011]
|
|
|
|
mean value: 0.7062877262852029
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03306842 0.03381634 0.02899456 0.02887368 0.02807379 0.03325653
|
|
0.03162026 0.03322268 0.03296661 0.03240705]
|
|
|
|
mean value: 0.03162999153137207
|
|
|
|
key: score_time
|
|
value: [0.01416421 0.01412463 0.01416826 0.01375866 0.01350045 0.01424146
|
|
0.01417565 0.01405978 0.01455617 0.01422095]
|
|
|
|
mean value: 0.014097023010253906
|
|
|
|
key: test_mcc
|
|
value: [0.65915306 0.59637658 0.59404013 0.69293487 0.54443762 0.65821474
|
|
0.65085805 0.41661348 0.58778119 0.62939541]
|
|
|
|
mean value: 0.6029805129541226
|
|
|
|
key: train_mcc
|
|
value: [0.72279499 0.71035225 0.7082164 0.71033605 0.72568869 0.72529147
|
|
0.71791058 0.72274854 0.72754221 0.73258386]
|
|
|
|
mean value: 0.7203465041191958
|
|
|
|
key: test_accuracy
|
|
value: [0.828125 0.796875 0.796875 0.84375 0.76190476 0.82539683
|
|
0.82539683 0.6984127 0.77777778 0.80952381]
|
|
|
|
mean value: 0.7964037698412698
|
|
|
|
key: train_accuracy
|
|
value: [0.85964912 0.85438596 0.85263158 0.85263158 0.86164623 0.86164623
|
|
0.85814361 0.85989492 0.86164623 0.86339755]
|
|
|
|
mean value: 0.8585673026699849
|
|
|
|
key: test_fscore
|
|
value: [0.8358209 0.80597015 0.79365079 0.85294118 0.79452055 0.84057971
|
|
0.83076923 0.73239437 0.80555556 0.78571429]
|
|
|
|
mean value: 0.8077916711223889
|
|
|
|
key: train_fscore
|
|
value: [0.86622074 0.85908319 0.8590604 0.86092715 0.86677909 0.86632826
|
|
0.86247878 0.86622074 0.86898839 0.87171053]
|
|
|
|
mean value: 0.8647797260273575
|
|
|
|
key: test_precision
|
|
value: [0.8 0.77142857 0.80645161 0.80555556 0.70731707 0.78378378
|
|
0.81818182 0.65 0.70731707 0.88 ]
|
|
|
|
mean value: 0.7730035488194418
|
|
|
|
key: train_precision
|
|
value: [0.82747604 0.83223684 0.82315113 0.81504702 0.83441558 0.83660131
|
|
0.83552632 0.83012821 0.82649842 0.82298137]
|
|
|
|
mean value: 0.8284062229484791
|
|
|
|
key: test_recall
|
|
value: [0.875 0.84375 0.78125 0.90625 0.90625 0.90625
|
|
0.84375 0.83870968 0.93548387 0.70967742]
|
|
|
|
mean value: 0.8546370967741935
|
|
|
|
key: train_recall
|
|
value: [0.90877193 0.8877193 0.89824561 0.9122807 0.90175439 0.89824561
|
|
0.89122807 0.90559441 0.91608392 0.92657343]
|
|
|
|
mean value: 0.9046497362286836
|
|
|
|
key: test_roc_auc
|
|
value: [0.828125 0.796875 0.796875 0.84375 0.75957661 0.82409274
|
|
0.82510081 0.70060484 0.78024194 0.80796371]
|
|
|
|
mean value: 0.7963205645161291
|
|
|
|
key: train_roc_auc
|
|
value: [0.85964912 0.85438596 0.85263158 0.85263158 0.86171635 0.86171022
|
|
0.85820145 0.85981475 0.86155073 0.86328671]
|
|
|
|
mean value: 0.8585578456631089
|
|
|
|
key: test_jcc
|
|
value: [0.71794872 0.675 0.65789474 0.74358974 0.65909091 0.725
|
|
0.71052632 0.57777778 0.6744186 0.64705882]
|
|
|
|
mean value: 0.6788305629219302
|
|
|
|
key: train_jcc
|
|
value: [0.7640118 0.75297619 0.75294118 0.75581395 0.76488095 0.7641791
|
|
0.75820896 0.7640118 0.76832845 0.77259475]
|
|
|
|
mean value: 0.7617947129272045
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.00646472 2.37337565 0.83091569 1.837955 1.98165274 1.99141598
|
|
2.06337285 2.09861183 1.97596526 1.9511888 ]
|
|
|
|
mean value: 1.9110918521881104
|
|
|
|
key: score_time
|
|
value: [0.01247263 0.01252747 0.01249647 0.01088095 0.01669049 0.01359892
|
|
0.01792812 0.01524568 0.01656628 0.01519775]
|
|
|
|
mean value: 0.014360475540161132
|
|
|
|
key: test_mcc
|
|
value: [0.57265629 0.75146915 0.50097943 0.65915306 0.49960192 0.62325024
|
|
0.55611985 0.58770161 0.61445255 0.5253647 ]
|
|
|
|
mean value: 0.5890748799761464
|
|
|
|
key: train_mcc
|
|
value: [0.97558874 0.98606204 0.84694977 0.94395263 0.97898417 0.96195115
|
|
0.95494277 0.97207363 0.965351 0.96862386]
|
|
|
|
mean value: 0.9554479759844114
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.875 0.75 0.828125 0.74603175 0.80952381
|
|
0.77777778 0.79365079 0.79365079 0.76190476]
|
|
|
|
mean value: 0.7916914682539682
|
|
|
|
key: train_accuracy
|
|
value: [0.9877193 0.99298246 0.92280702 0.97192982 0.98949212 0.98073555
|
|
0.97723292 0.98598949 0.98248687 0.98423818]
|
|
|
|
mean value: 0.9775613727839739
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.87878788 0.75757576 0.81967213 0.77142857 0.82352941
|
|
0.78787879 0.79365079 0.81690141 0.74576271]
|
|
|
|
mean value: 0.7995187452549147
|
|
|
|
key: train_fscore
|
|
value: [0.98782609 0.99303136 0.92491468 0.97212544 0.98947368 0.98100173
|
|
0.9775475 0.98611111 0.98275862 0.98440208]
|
|
|
|
mean value: 0.9779192275681451
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.85294118 0.73529412 0.86206897 0.71052632 0.77777778
|
|
0.76470588 0.78125 0.725 0.78571429]
|
|
|
|
mean value: 0.7732120626532525
|
|
|
|
key: train_precision
|
|
value: [0.97931034 0.98615917 0.90033223 0.96539792 0.98947368 0.96598639
|
|
0.96258503 0.97931034 0.96938776 0.97594502]
|
|
|
|
mean value: 0.9673887894060526
|
|
|
|
key: test_recall
|
|
value: [0.875 0.90625 0.78125 0.78125 0.84375 0.875
|
|
0.8125 0.80645161 0.93548387 0.70967742]
|
|
|
|
mean value: 0.8326612903225806
|
|
|
|
key: train_recall
|
|
value: [0.99649123 1. 0.95087719 0.97894737 0.98947368 0.99649123
|
|
0.99298246 0.99300699 0.9965035 0.99300699]
|
|
|
|
mean value: 0.9887780640412219
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.875 0.75 0.828125 0.74445565 0.80846774
|
|
0.77721774 0.79385081 0.79586694 0.76108871]
|
|
|
|
mean value: 0.7915322580645161
|
|
|
|
key: train_roc_auc
|
|
value: [0.9877193 0.99298246 0.92280702 0.97192982 0.98949209 0.9807631
|
|
0.97726046 0.98597718 0.98246227 0.98422279]
|
|
|
|
mean value: 0.9775616488774383
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.78378378 0.6097561 0.69444444 0.62790698 0.7
|
|
0.65 0.65789474 0.69047619 0.59459459]
|
|
|
|
mean value: 0.6675523491112947
|
|
|
|
key: train_jcc
|
|
value: [0.97594502 0.98615917 0.86031746 0.94576271 0.97916667 0.96271186
|
|
0.95608108 0.97260274 0.96610169 0.96928328]
|
|
|
|
mean value: 0.9574131682160492
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04414916 0.0394628 0.0351615 0.03433752 0.04042983 0.03405404
|
|
0.03932333 0.03669691 0.03327823 0.04195404]
|
|
|
|
mean value: 0.03788473606109619
|
|
|
|
key: score_time
|
|
value: [0.01111007 0.00946021 0.00911212 0.00907183 0.00917482 0.00916386
|
|
0.00986719 0.00945878 0.00997353 0.00954318]
|
|
|
|
mean value: 0.009593558311462403
|
|
|
|
key: test_mcc
|
|
value: [0.75 0.90669283 0.71910121 0.875 0.65419917 0.58728587
|
|
0.65315611 0.61895161 0.53159579 0.84530217]
|
|
|
|
mean value: 0.7141284759557099
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.953125 0.859375 0.9375 0.82539683 0.79365079
|
|
0.82539683 0.80952381 0.76190476 0.92063492]
|
|
|
|
mean value: 0.8561507936507936
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.95238095 0.85714286 0.9375 0.81967213 0.8
|
|
0.8358209 0.80645161 0.7761194 0.92307692]
|
|
|
|
mean value: 0.8583164775158962
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.96774194 0.87096774 0.9375 0.86206897 0.78787879
|
|
0.8 0.80645161 0.72222222 0.88235294]
|
|
|
|
mean value: 0.8512184207117303
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.9375 0.84375 0.9375 0.78125 0.8125
|
|
0.875 0.80645161 0.83870968 0.96774194]
|
|
|
|
mean value: 0.8675403225806452
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.953125 0.859375 0.9375 0.82610887 0.79334677
|
|
0.82459677 0.80947581 0.76310484 0.92137097]
|
|
|
|
mean value: 0.8563004032258065
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.90909091 0.75 0.88235294 0.69444444 0.66666667
|
|
0.71794872 0.67567568 0.63414634 0.85714286]
|
|
|
|
mean value: 0.7565246331386933
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.15806508 0.14951444 0.15549088 0.15570831 0.15163684 0.15120387
|
|
0.14514089 0.14617419 0.14691663 0.14852071]
|
|
|
|
mean value: 0.15083718299865723
|
|
|
|
key: score_time
|
|
value: [0.02020621 0.01982594 0.0203526 0.01929188 0.01975894 0.01916385
|
|
0.02001977 0.01960254 0.0192945 0.02043915]
|
|
|
|
mean value: 0.019795536994934082
|
|
|
|
key: test_mcc
|
|
value: [0.75592895 0.71910121 0.59404013 0.69293487 0.55611985 0.5253647
|
|
0.5892604 0.4969666 0.53874599 0.7591889 ]
|
|
|
|
mean value: 0.6227651593905483
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.859375 0.796875 0.84375 0.77777778 0.76190476
|
|
0.79365079 0.74603175 0.76190476 0.87301587]
|
|
|
|
mean value: 0.8089285714285714
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.85714286 0.8 0.83333333 0.78787879 0.7761194
|
|
0.80597015 0.75757576 0.7826087 0.85714286]
|
|
|
|
mean value: 0.8140124782141044
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.87096774 0.78787879 0.89285714 0.76470588 0.74285714
|
|
0.77142857 0.71428571 0.71052632 0.96 ]
|
|
|
|
mean value: 0.8048840632718591
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.84375 0.8125 0.78125 0.8125 0.8125
|
|
0.84375 0.80645161 0.87096774 0.77419355]
|
|
|
|
mean value: 0.8295362903225807
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.859375 0.796875 0.84375 0.77721774 0.76108871
|
|
0.79284274 0.74697581 0.76360887 0.87147177]
|
|
|
|
mean value: 0.808820564516129
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.75 0.66666667 0.71428571 0.65 0.63414634
|
|
0.675 0.6097561 0.64285714 0.75 ]
|
|
|
|
mean value: 0.6882185647044441
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01188111 0.01215482 0.01145577 0.01158643 0.01207089 0.01227689
|
|
0.01192045 0.01207066 0.0113132 0.01151323]
|
|
|
|
mean value: 0.011824345588684082
|
|
|
|
key: score_time
|
|
value: [0.00942564 0.00984573 0.00936389 0.00920653 0.0097208 0.00978017
|
|
0.00969028 0.00946641 0.00935674 0.00909328]
|
|
|
|
mean value: 0.009494948387145995
|
|
|
|
key: test_mcc
|
|
value: [0.53150959 0.5 0.53150959 0.51639778 0.23915249 0.55544355
|
|
0.40327957 0.29185862 0.39717742 0.42842742]
|
|
|
|
mean value: 0.43947560209106934
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.765625 0.75 0.765625 0.75 0.61904762 0.77777778
|
|
0.6984127 0.63492063 0.6984127 0.71428571]
|
|
|
|
mean value: 0.7174107142857142
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.75 0.76923077 0.71428571 0.61290323 0.78125
|
|
0.6779661 0.68493151 0.6984127 0.70967742]
|
|
|
|
mean value: 0.7167888204865471
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75757576 0.75 0.75757576 0.83333333 0.63333333 0.78125
|
|
0.74074074 0.5952381 0.6875 0.70967742]
|
|
|
|
mean value: 0.7246224437151857
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.78125 0.75 0.78125 0.625 0.59375 0.78125
|
|
0.625 0.80645161 0.70967742 0.70967742]
|
|
|
|
mean value: 0.7163306451612903
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.765625 0.75 0.765625 0.75 0.61945565 0.77772177
|
|
0.69959677 0.63760081 0.69858871 0.71421371]
|
|
|
|
mean value: 0.7178427419354838
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.6 0.625 0.55555556 0.44186047 0.64102564
|
|
0.51282051 0.52083333 0.53658537 0.55 ]
|
|
|
|
mean value: 0.5608680873704981
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.21129918 2.18176174 2.17599201 2.15194917 2.18359351 2.18674564
|
|
2.23126411 2.19483113 2.19053888 2.27243757]
|
|
|
|
mean value: 2.198041296005249
|
|
|
|
key: score_time
|
|
value: [0.10429049 0.09528875 0.09521317 0.09452415 0.1034019 0.09562993
|
|
0.09537983 0.09546041 0.10098195 0.10416818]
|
|
|
|
mean value: 0.09843387603759765
|
|
|
|
key: test_mcc
|
|
value: [0.84416229 0.90669283 0.84416229 0.8125 0.80947581 0.71443023
|
|
0.68352185 0.81130213 0.72407013 0.87462485]
|
|
|
|
mean value: 0.8024942409158446
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.921875 0.953125 0.921875 0.90625 0.9047619 0.85714286
|
|
0.84126984 0.9047619 0.85714286 0.93650794]
|
|
|
|
mean value: 0.9004712301587301
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92063492 0.95238095 0.92307692 0.90625 0.90625 0.86153846
|
|
0.84848485 0.90625 0.86567164 0.93333333]
|
|
|
|
mean value: 0.9023871081240484
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.93548387 0.96774194 0.90909091 0.90625 0.90625 0.84848485
|
|
0.82352941 0.87878788 0.80555556 0.96551724]
|
|
|
|
mean value: 0.8946691651514821
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90625 0.9375 0.9375 0.90625 0.90625 0.875
|
|
0.875 0.93548387 0.93548387 0.90322581]
|
|
|
|
mean value: 0.9117943548387096
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.953125 0.921875 0.90625 0.9047379 0.85685484
|
|
0.84072581 0.90524194 0.85836694 0.9359879 ]
|
|
|
|
mean value: 0.9005040322580645
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85294118 0.90909091 0.85714286 0.82857143 0.82857143 0.75675676
|
|
0.73684211 0.82857143 0.76315789 0.875 ]
|
|
|
|
mean value: 0.8236645985175397
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.03612447 1.02883792 1.05346131 1.02845502 1.04689741 1.01136518
|
|
1.0421288 1.02813244 1.04972386 1.01992607]
|
|
|
|
mean value: 1.0345052480697632
|
|
|
|
key: score_time
|
|
value: [0.22935534 0.23343563 0.23888326 0.12340879 0.27946091 0.26874781
|
|
0.24961305 0.19223285 0.27546382 0.29060841]
|
|
|
|
mean value: 0.23812098503112794
|
|
|
|
key: test_mcc
|
|
value: [0.84416229 0.90669283 0.8125 0.8125 0.80947581 0.71705182
|
|
0.71705182 0.78160117 0.72407013 0.84484323]
|
|
|
|
mean value: 0.7969949084526066
|
|
|
|
key: train_mcc
|
|
value: [0.94063464 0.92677801 0.92728096 0.9340293 0.93052245 0.93391766
|
|
0.92303445 0.94432589 0.937524 0.92350399]
|
|
|
|
mean value: 0.9321551356771407
|
|
|
|
key: test_accuracy
|
|
value: [0.921875 0.953125 0.90625 0.90625 0.9047619 0.85714286
|
|
0.85714286 0.88888889 0.85714286 0.92063492]
|
|
|
|
mean value: 0.8973214285714286
|
|
|
|
key: train_accuracy
|
|
value: [0.97017544 0.96315789 0.96315789 0.96666667 0.96497373 0.96672504
|
|
0.9614711 0.97197898 0.96847636 0.9614711 ]
|
|
|
|
mean value: 0.9658254216978523
|
|
|
|
key: test_fscore
|
|
value: [0.92063492 0.95238095 0.90625 0.90625 0.90625 0.86567164
|
|
0.86567164 0.89230769 0.86567164 0.91525424]
|
|
|
|
mean value: 0.8996342727984835
|
|
|
|
key: train_fscore
|
|
value: [0.97053726 0.96373057 0.96397942 0.96729776 0.96551724 0.9671848
|
|
0.96167247 0.97241379 0.96907216 0.96219931]
|
|
|
|
mean value: 0.9663604798329994
|
|
|
|
key: test_precision
|
|
value: [0.93548387 0.96774194 0.90625 0.90625 0.90625 0.82857143
|
|
0.82857143 0.85294118 0.80555556 0.96428571]
|
|
|
|
mean value: 0.8901901109906328
|
|
|
|
key: train_precision
|
|
value: [0.95890411 0.94897959 0.94295302 0.94932432 0.94915254 0.95238095
|
|
0.9550173 0.95918367 0.9527027 0.94594595]
|
|
|
|
mean value: 0.9514544163794261
|
|
|
|
key: test_recall
|
|
value: [0.90625 0.9375 0.90625 0.90625 0.90625 0.90625
|
|
0.90625 0.93548387 0.93548387 0.87096774]
|
|
|
|
mean value: 0.9116935483870967
|
|
|
|
key: train_recall
|
|
value: [0.98245614 0.97894737 0.98596491 0.98596491 0.98245614 0.98245614
|
|
0.96842105 0.98601399 0.98601399 0.97902098]
|
|
|
|
mean value: 0.9817715617715618
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.953125 0.90625 0.90625 0.9047379 0.85635081
|
|
0.85635081 0.88961694 0.85836694 0.91985887]
|
|
|
|
mean value: 0.8972782258064516
|
|
|
|
key: train_roc_auc
|
|
value: [0.97017544 0.96315789 0.96315789 0.96666667 0.96500429 0.96675255
|
|
0.96148325 0.97195436 0.96844559 0.96144031]
|
|
|
|
mean value: 0.9658238252975095
|
|
|
|
key: test_jcc
|
|
value: [0.85294118 0.90909091 0.82857143 0.82857143 0.82857143 0.76315789
|
|
0.76315789 0.80555556 0.76315789 0.84375 ]
|
|
|
|
mean value: 0.8186525611041865
|
|
|
|
key: train_jcc
|
|
value: [0.94276094 0.93 0.93046358 0.93666667 0.93333333 0.93645485
|
|
0.9261745 0.94630872 0.94 0.92715232]
|
|
|
|
mean value: 0.9349314907775516
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01233745 0.01187658 0.01076055 0.01092172 0.01135612 0.01156545
|
|
0.01084685 0.01140285 0.01177788 0.01220751]
|
|
|
|
mean value: 0.01150529384613037
|
|
|
|
key: score_time
|
|
value: [0.01918459 0.00934148 0.00909972 0.00989866 0.0099771 0.00917387
|
|
0.00946617 0.01000452 0.00948906 0.01004934]
|
|
|
|
mean value: 0.010568451881408692
|
|
|
|
key: test_mcc
|
|
value: [0.5625 0.438357 0.2214702 0.56360186 0.53549564 0.61895161
|
|
0.49493401 0.34405576 0.48255984 0.4307759 ]
|
|
|
|
mean value: 0.46927018296024287
|
|
|
|
key: train_mcc
|
|
value: [0.53512128 0.56101149 0.55927353 0.54447222 0.5466585 0.53152779
|
|
0.56477321 0.53613782 0.54097122 0.54351524]
|
|
|
|
mean value: 0.5463462303428195
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.71875 0.609375 0.78125 0.76190476 0.80952381
|
|
0.74603175 0.66666667 0.73015873 0.71428571]
|
|
|
|
mean value: 0.7319196428571428
|
|
|
|
key: train_accuracy
|
|
value: [0.76491228 0.77894737 0.77894737 0.77017544 0.77232925 0.76532399
|
|
0.78108581 0.76707531 0.76882662 0.77057793]
|
|
|
|
mean value: 0.771820137032599
|
|
|
|
key: test_fscore
|
|
value: [0.78125 0.72727273 0.57627119 0.78787879 0.78873239 0.8125
|
|
0.76470588 0.69565217 0.76056338 0.68965517]
|
|
|
|
mean value: 0.7384481704919857
|
|
|
|
key: train_fscore
|
|
value: [0.78032787 0.79 0.78644068 0.78347107 0.78114478 0.77133106
|
|
0.79061977 0.77721943 0.78145695 0.78130217]
|
|
|
|
mean value: 0.7823313780270075
|
|
|
|
key: test_precision
|
|
value: [0.78125 0.70588235 0.62962963 0.76470588 0.71794872 0.8125
|
|
0.72222222 0.63157895 0.675 0.74074074]
|
|
|
|
mean value: 0.7181458493203849
|
|
|
|
key: train_precision
|
|
value: [0.73230769 0.75238095 0.76065574 0.740625 0.75080906 0.75083056
|
|
0.75641026 0.74598071 0.74213836 0.74760383]
|
|
|
|
mean value: 0.7479742171117733
|
|
|
|
key: test_recall
|
|
value: [0.78125 0.75 0.53125 0.8125 0.875 0.8125
|
|
0.8125 0.77419355 0.87096774 0.64516129]
|
|
|
|
mean value: 0.7665322580645161
|
|
|
|
key: train_recall
|
|
value: [0.83508772 0.83157895 0.81403509 0.83157895 0.81403509 0.79298246
|
|
0.82807018 0.81118881 0.82517483 0.81818182]
|
|
|
|
mean value: 0.8201913875598086
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.71875 0.609375 0.78125 0.76008065 0.80947581
|
|
0.74495968 0.66834677 0.73235887 0.71320565]
|
|
|
|
mean value: 0.7319052419354839
|
|
|
|
key: train_roc_auc
|
|
value: [0.76491228 0.77894737 0.77894737 0.77017544 0.77240216 0.76537235
|
|
0.78116795 0.76699791 0.76872776 0.77049442]
|
|
|
|
mean value: 0.7718145012881855
|
|
|
|
key: test_jcc
|
|
value: [0.64102564 0.57142857 0.4047619 0.65 0.65116279 0.68421053
|
|
0.61904762 0.53333333 0.61363636 0.52631579]
|
|
|
|
mean value: 0.5894922539720582
|
|
|
|
key: train_jcc
|
|
value: [0.63978495 0.65289256 0.64804469 0.64402174 0.64088398 0.62777778
|
|
0.65373961 0.63561644 0.64130435 0.64109589]
|
|
|
|
mean value: 0.6425161984547801
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.13100171 0.1108017 0.11466289 0.1149056 0.11569571 0.11424947
|
|
0.1059444 0.10726643 0.10647297 0.10821867]
|
|
|
|
mean value: 0.11292195320129395
|
|
|
|
key: score_time
|
|
value: [0.01161242 0.01174259 0.01199245 0.01133132 0.012321 0.01231289
|
|
0.011199 0.01135135 0.01112986 0.01113558]
|
|
|
|
mean value: 0.011612844467163087
|
|
|
|
key: test_mcc
|
|
value: [0.84416229 0.9375 0.84416229 0.8125 0.90524194 0.71705182
|
|
0.71443023 0.81130213 0.72407013 0.96875 ]
|
|
|
|
mean value: 0.8279170821280594
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.921875 0.96875 0.921875 0.90625 0.95238095 0.85714286
|
|
0.85714286 0.9047619 0.85714286 0.98412698]
|
|
|
|
mean value: 0.9131448412698413
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.96875 0.92063492 0.90625 0.95238095 0.86567164
|
|
0.86153846 0.90625 0.86567164 0.98412698]
|
|
|
|
mean value: 0.9154351525340331
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.96875 0.93548387 0.90625 0.96774194 0.82857143
|
|
0.84848485 0.87878788 0.80555556 0.96875 ]
|
|
|
|
mean value: 0.9017466426942233
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.96875 0.90625 0.90625 0.9375 0.90625
|
|
0.875 0.93548387 0.93548387 1. ]
|
|
|
|
mean value: 0.9308467741935483
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.96875 0.921875 0.90625 0.95262097 0.85635081
|
|
0.85685484 0.90524194 0.85836694 0.984375 ]
|
|
|
|
mean value: 0.9132560483870967
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.93939394 0.85294118 0.82857143 0.90909091 0.76315789
|
|
0.75675676 0.82857143 0.76315789 0.96875 ]
|
|
|
|
mean value: 0.8467534285471592
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04637885 0.08834767 0.06918025 0.05507755 0.08053446 0.07429886
|
|
0.04650426 0.07581925 0.07261181 0.05177426]
|
|
|
|
mean value: 0.0660527229309082
|
|
|
|
key: score_time
|
|
value: [0.02093959 0.01971197 0.01240396 0.01932096 0.01950669 0.01235509
|
|
0.01232791 0.01954722 0.01231313 0.01326156]
|
|
|
|
mean value: 0.016168808937072753
|
|
|
|
key: test_mcc
|
|
value: [0.56360186 0.64549722 0.40644851 0.76354172 0.69429215 0.61982085
|
|
0.71705182 0.60364273 0.57596915 0.58728587]
|
|
|
|
mean value: 0.6177151884770942
|
|
|
|
key: train_mcc
|
|
value: [0.84327404 0.82219219 0.84694977 0.84327404 0.85002782 0.85365432
|
|
0.82953205 0.82950084 0.8372133 0.82614956]
|
|
|
|
mean value: 0.8381767933116789
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.8125 0.703125 0.875 0.84126984 0.80952381
|
|
0.85714286 0.79365079 0.77777778 0.79365079]
|
|
|
|
mean value: 0.8044890873015873
|
|
|
|
key: train_accuracy
|
|
value: [0.92105263 0.91052632 0.92280702 0.92105263 0.92469352 0.92644483
|
|
0.91418564 0.91418564 0.91768827 0.91243433]
|
|
|
|
mean value: 0.9185070820659355
|
|
|
|
key: test_fscore
|
|
value: [0.78787879 0.83333333 0.6984127 0.86206897 0.85714286 0.81818182
|
|
0.86567164 0.8115942 0.8 0.78688525]
|
|
|
|
mean value: 0.8121169551057972
|
|
|
|
key: train_fscore
|
|
value: [0.92307692 0.91282051 0.92491468 0.92307692 0.92598967 0.92783505
|
|
0.91623932 0.9165247 0.92047377 0.91496599]
|
|
|
|
mean value: 0.9205917537039755
|
|
|
|
key: test_precision
|
|
value: [0.76470588 0.75 0.70967742 0.96153846 0.78947368 0.79411765
|
|
0.82857143 0.73684211 0.71794872 0.8 ]
|
|
|
|
mean value: 0.7852875346298895
|
|
|
|
key: train_precision
|
|
value: [0.9 0.89 0.90033223 0.9 0.90878378 0.90909091
|
|
0.89333333 0.89368771 0.89180328 0.89072848]
|
|
|
|
mean value: 0.897775971527256
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.9375 0.6875 0.78125 0.9375 0.84375
|
|
0.90625 0.90322581 0.90322581 0.77419355]
|
|
|
|
mean value: 0.8486895161290322
|
|
|
|
key: train_recall
|
|
value: [0.94736842 0.93684211 0.95087719 0.94736842 0.94385965 0.94736842
|
|
0.94035088 0.94055944 0.95104895 0.94055944]
|
|
|
|
mean value: 0.944620291988713
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.8125 0.703125 0.875 0.83971774 0.80897177
|
|
0.85635081 0.7953629 0.7797379 0.79334677]
|
|
|
|
mean value: 0.8045362903225807
|
|
|
|
key: train_roc_auc
|
|
value: [0.92105263 0.91052632 0.92280702 0.92105263 0.92472703 0.92648141
|
|
0.91423138 0.91413937 0.91762974 0.91238498]
|
|
|
|
mean value: 0.91850325113483
|
|
|
|
key: test_jcc
|
|
value: [0.65 0.71428571 0.53658537 0.75757576 0.75 0.69230769
|
|
0.76315789 0.68292683 0.66666667 0.64864865]
|
|
|
|
mean value: 0.6862154569343273
|
|
|
|
key: train_jcc
|
|
value: [0.85714286 0.83962264 0.86031746 0.85714286 0.86217949 0.86538462
|
|
0.84542587 0.84591195 0.85266458 0.84326019]
|
|
|
|
mean value: 0.8529052500760415
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02883339 0.01046467 0.01015687 0.00999999 0.01025605 0.01001477
|
|
0.01010776 0.01026273 0.0100286 0.01012015]
|
|
|
|
mean value: 0.012024497985839844
|
|
|
|
key: score_time
|
|
value: [0.0092907 0.00902653 0.00880337 0.00878048 0.0087707 0.00881338
|
|
0.00873876 0.00882483 0.00882506 0.008744 ]
|
|
|
|
mean value: 0.008861780166625977
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.5336001 0.50097943 0.68884672 0.60087592 0.59372402
|
|
0.59372402 0.43812738 0.48255984 0.56449867]
|
|
|
|
mean value: 0.5513333888290408
|
|
|
|
key: train_mcc
|
|
value: [0.58176182 0.61191897 0.58742755 0.57887321 0.60786984 0.56185823
|
|
0.56981689 0.61476947 0.615254 0.61631894]
|
|
|
|
mean value: 0.5945868902542161
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.765625 0.75 0.84375 0.79365079 0.79365079
|
|
0.79365079 0.71428571 0.73015873 0.77777778]
|
|
|
|
mean value: 0.7712549603174603
|
|
|
|
key: train_accuracy
|
|
value: [0.78947368 0.80350877 0.79122807 0.7877193 0.80210158 0.7793345
|
|
0.78283713 0.8056042 0.8056042 0.8056042 ]
|
|
|
|
mean value: 0.7953015638922174
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.7761194 0.74193548 0.84848485 0.81690141 0.8115942
|
|
0.8115942 0.73529412 0.76056338 0.75 ]
|
|
|
|
mean value: 0.7830264825295223
|
|
|
|
key: train_fscore
|
|
value: [0.7993311 0.81518152 0.80395387 0.79866889 0.81198003 0.79
|
|
0.79470199 0.8159204 0.81652893 0.81773399]
|
|
|
|
mean value: 0.8064000712331675
|
|
|
|
key: test_precision
|
|
value: [0.7 0.74285714 0.76666667 0.82352941 0.74358974 0.75675676
|
|
0.75675676 0.67567568 0.675 0.84 ]
|
|
|
|
mean value: 0.7480832154067448
|
|
|
|
key: train_precision
|
|
value: [0.76357827 0.7694704 0.75776398 0.75949367 0.7721519 0.75238095
|
|
0.7523511 0.77602524 0.77429467 0.77089783]
|
|
|
|
mean value: 0.7648408014336768
|
|
|
|
key: test_recall
|
|
value: [0.875 0.8125 0.71875 0.875 0.90625 0.875
|
|
0.875 0.80645161 0.87096774 0.67741935]
|
|
|
|
mean value: 0.8292338709677419
|
|
|
|
key: train_recall
|
|
value: [0.83859649 0.86666667 0.85614035 0.84210526 0.85614035 0.83157895
|
|
0.84210526 0.86013986 0.86363636 0.87062937]
|
|
|
|
mean value: 0.8527738927738928
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.765625 0.75 0.84375 0.79183468 0.79233871
|
|
0.79233871 0.71572581 0.73235887 0.77620968]
|
|
|
|
mean value: 0.7710181451612903
|
|
|
|
key: train_roc_auc
|
|
value: [0.78947368 0.80350877 0.79122807 0.7877193 0.80219605 0.77942584
|
|
0.78294074 0.80550853 0.80550239 0.80549012]
|
|
|
|
mean value: 0.795299349773034
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.63414634 0.58974359 0.73684211 0.69047619 0.68292683
|
|
0.68292683 0.58139535 0.61363636 0.6 ]
|
|
|
|
mean value: 0.6448457234320147
|
|
|
|
key: train_jcc
|
|
value: [0.66573816 0.68802228 0.67217631 0.66481994 0.68347339 0.65289256
|
|
0.65934066 0.68907563 0.68994413 0.69166667]
|
|
|
|
mean value: 0.6757149740497587
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01466751 0.02034426 0.02604008 0.02144074 0.02184224 0.02580714
|
|
0.02606153 0.02597189 0.02331066 0.02317643]
|
|
|
|
mean value: 0.022866249084472656
|
|
|
|
key: score_time
|
|
value: [0.01097655 0.01114917 0.01177526 0.01173902 0.01183462 0.01193428
|
|
0.01192403 0.01199174 0.01196337 0.01206636]
|
|
|
|
mean value: 0.01173543930053711
|
|
|
|
key: test_mcc
|
|
value: [0.52915026 0.62622429 0.6011334 0.75592895 0.57258185 0.55611985
|
|
0.39842149 0.50132936 0.53159579 0.55611985]
|
|
|
|
mean value: 0.5628605087006604
|
|
|
|
key: train_mcc
|
|
value: [0.48845623 0.73720978 0.75400915 0.73036878 0.72229646 0.73423379
|
|
0.61275359 0.6925491 0.7308577 0.72561169]
|
|
|
|
mean value: 0.6928346269749263
|
|
|
|
key: test_accuracy
|
|
value: [0.71875 0.8125 0.796875 0.875 0.77777778 0.77777778
|
|
0.68253968 0.71428571 0.76190476 0.77777778]
|
|
|
|
mean value: 0.7695188492063492
|
|
|
|
key: train_accuracy
|
|
value: [0.69473684 0.86842105 0.87017544 0.86491228 0.85639229 0.86690018
|
|
0.78283713 0.82661996 0.86514886 0.86164623]
|
|
|
|
mean value: 0.8357790272528959
|
|
|
|
key: test_fscore
|
|
value: [0.7804878 0.80645161 0.8115942 0.86666667 0.80555556 0.78787879
|
|
0.61538462 0.76923077 0.7761194 0.76666667]
|
|
|
|
mean value: 0.7786036085047962
|
|
|
|
key: train_fscore
|
|
value: [0.76549865 0.87046632 0.88141026 0.86225403 0.86688312 0.86428571
|
|
0.73043478 0.85157421 0.8627451 0.86722689]
|
|
|
|
mean value: 0.8422779070456206
|
|
|
|
key: test_precision
|
|
value: [0.64 0.83333333 0.75675676 0.92857143 0.725 0.76470588
|
|
0.8 0.63829787 0.72222222 0.79310345]
|
|
|
|
mean value: 0.760199094385297
|
|
|
|
key: train_precision
|
|
value: [0.6214442 0.85714286 0.81120944 0.87956204 0.80664653 0.88
|
|
0.96 0.74540682 0.88 0.83495146]
|
|
|
|
mean value: 0.8276363347916831
|
|
|
|
key: test_recall
|
|
value: [1. 0.78125 0.875 0.8125 0.90625 0.8125
|
|
0.5 0.96774194 0.83870968 0.74193548]
|
|
|
|
mean value: 0.8235887096774194
|
|
|
|
key: train_recall
|
|
value: [0.99649123 0.88421053 0.96491228 0.84561404 0.93684211 0.84912281
|
|
0.58947368 0.99300699 0.84615385 0.9020979 ]
|
|
|
|
mean value: 0.8807925407925408
|
|
|
|
key: test_roc_auc
|
|
value: [0.71875 0.8125 0.796875 0.875 0.77570565 0.77721774
|
|
0.68548387 0.71824597 0.76310484 0.77721774]
|
|
|
|
mean value: 0.7700100806451613
|
|
|
|
key: train_roc_auc
|
|
value: [0.69473684 0.86842105 0.87017544 0.86491228 0.85653294 0.8668691
|
|
0.78249908 0.82632806 0.86518219 0.86157527]
|
|
|
|
mean value: 0.8357232241442768
|
|
|
|
key: test_jcc
|
|
value: [0.64 0.67567568 0.68292683 0.76470588 0.6744186 0.65
|
|
0.44444444 0.625 0.63414634 0.62162162]
|
|
|
|
mean value: 0.6412939399477553
|
|
|
|
key: train_jcc
|
|
value: [0.62008734 0.7706422 0.78796562 0.75786164 0.76504298 0.76100629
|
|
0.57534247 0.74151436 0.75862069 0.76557864]
|
|
|
|
mean value: 0.7303662209332994
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02438712 0.02357697 0.02358818 0.03072786 0.02425742 0.02381968
|
|
0.05035043 0.02409315 0.01967001 0.02207899]
|
|
|
|
mean value: 0.026654982566833497
|
|
|
|
key: score_time
|
|
value: [0.01202822 0.01200724 0.01198888 0.01192451 0.01296234 0.01313114
|
|
0.01228142 0.01198983 0.01198268 0.01196551]
|
|
|
|
mean value: 0.012226176261901856
|
|
|
|
key: test_mcc
|
|
value: [0.43033148 0.63228041 0.5378562 0.71910121 0.52928314 0.61982085
|
|
0.78719616 0.45528691 0.4969666 0.68865372]
|
|
|
|
mean value: 0.589677668921627
|
|
|
|
key: train_mcc
|
|
value: [0.45769586 0.7525536 0.70906282 0.76491699 0.67239075 0.79105503
|
|
0.78723304 0.70209276 0.72529147 0.71320874]
|
|
|
|
mean value: 0.7075501065137321
|
|
|
|
key: test_accuracy
|
|
value: [0.65625 0.796875 0.765625 0.859375 0.74603175 0.80952381
|
|
0.88888889 0.6984127 0.74603175 0.84126984]
|
|
|
|
mean value: 0.780828373015873
|
|
|
|
key: train_accuracy
|
|
value: [0.6754386 0.86666667 0.84736842 0.88245614 0.81611208 0.89316988
|
|
0.89141856 0.83362522 0.86164623 0.84238179]
|
|
|
|
mean value: 0.8410283589885397
|
|
|
|
key: test_fscore
|
|
value: [0.74418605 0.82666667 0.7826087 0.85714286 0.78947368 0.81818182
|
|
0.89855072 0.75324675 0.75757576 0.84848485]
|
|
|
|
mean value: 0.807611785231071
|
|
|
|
key: train_fscore
|
|
value: [0.75431607 0.88012618 0.86124402 0.88224956 0.84257871 0.8985025
|
|
0.89666667 0.85627837 0.85662432 0.86196319]
|
|
|
|
mean value: 0.8590549580660698
|
|
|
|
key: test_precision
|
|
value: [0.59259259 0.72093023 0.72972973 0.87096774 0.68181818 0.79411765
|
|
0.83783784 0.63043478 0.71428571 0.8 ]
|
|
|
|
mean value: 0.7372714460425198
|
|
|
|
key: train_precision
|
|
value: [0.60683761 0.79942693 0.78947368 0.88380282 0.73560209 0.85443038
|
|
0.85396825 0.75466667 0.89056604 0.76775956]
|
|
|
|
mean value: 0.7936534037246936
|
|
|
|
key: test_recall
|
|
value: [1. 0.96875 0.84375 0.84375 0.9375 0.84375
|
|
0.96875 0.93548387 0.80645161 0.90322581]
|
|
|
|
mean value: 0.9051411290322581
|
|
|
|
key: train_recall
|
|
value: [0.99649123 0.97894737 0.94736842 0.88070175 0.98596491 0.94736842
|
|
0.94385965 0.98951049 0.82517483 0.98251748]
|
|
|
|
mean value: 0.9477904551588762
|
|
|
|
key: test_roc_auc
|
|
value: [0.65625 0.796875 0.765625 0.859375 0.74294355 0.80897177
|
|
0.88760081 0.70211694 0.74697581 0.8422379 ]
|
|
|
|
mean value: 0.7808971774193548
|
|
|
|
key: train_roc_auc
|
|
value: [0.6754386 0.86666667 0.84736842 0.88245614 0.81640903 0.89326463
|
|
0.89151024 0.83335174 0.86171022 0.84213593]
|
|
|
|
mean value: 0.8410311618206355
|
|
|
|
key: test_jcc
|
|
value: [0.59259259 0.70454545 0.64285714 0.75 0.65217391 0.69230769
|
|
0.81578947 0.60416667 0.6097561 0.73684211]
|
|
|
|
mean value: 0.6801031138521372
|
|
|
|
key: train_jcc
|
|
value: [0.60554371 0.78591549 0.75630252 0.78930818 0.72797927 0.81570997
|
|
0.81268882 0.74867725 0.74920635 0.7574124 ]
|
|
|
|
mean value: 0.7548743963045716
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22404265 0.20949054 0.21080303 0.2099843 0.21042252 0.20934153
|
|
0.20883155 0.21123242 0.21139431 0.21479511]
|
|
|
|
mean value: 0.2120337963104248
|
|
|
|
key: score_time
|
|
value: [0.01565266 0.0155592 0.015872 0.01542377 0.01540327 0.0154717
|
|
0.01560116 0.01561856 0.01600695 0.01574636]
|
|
|
|
mean value: 0.0156355619430542
|
|
|
|
key: test_mcc
|
|
value: [0.87671401 0.84416229 0.75 0.84416229 0.84530217 0.68352185
|
|
0.71443023 0.68865372 0.76058095 0.87487431]
|
|
|
|
mean value: 0.7882401819986872
|
|
|
|
key: train_mcc
|
|
value: [0.95791832 0.93338505 0.9754446 0.94751425 0.96862577 0.95451924
|
|
0.96497362 0.96152336 0.96852915 0.94398027]
|
|
|
|
mean value: 0.9576413651109649
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.921875 0.875 0.921875 0.92063492 0.84126984
|
|
0.85714286 0.84126984 0.87301587 0.93650794]
|
|
|
|
mean value: 0.892609126984127
|
|
|
|
key: train_accuracy
|
|
value: [0.97894737 0.96666667 0.9877193 0.97368421 0.98423818 0.97723292
|
|
0.98248687 0.98073555 0.98423818 0.97197898]
|
|
|
|
mean value: 0.9787928226871908
|
|
|
|
key: test_fscore
|
|
value: [0.93939394 0.92063492 0.875 0.92063492 0.91803279 0.84848485
|
|
0.86153846 0.84848485 0.88235294 0.9375 ]
|
|
|
|
mean value: 0.8952057667233655
|
|
|
|
key: train_fscore
|
|
value: [0.97902098 0.96684119 0.98769772 0.97391304 0.98434783 0.97731239
|
|
0.98245614 0.98086957 0.98434783 0.97212544]
|
|
|
|
mean value: 0.9788932108732904
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.93548387 0.875 0.93548387 0.96551724 0.82352941
|
|
0.84848485 0.8 0.81081081 0.90909091]
|
|
|
|
mean value: 0.8815165669348421
|
|
|
|
key: train_precision
|
|
value: [0.97560976 0.96180556 0.98943662 0.96551724 0.97586207 0.97222222
|
|
0.98245614 0.97577855 0.97923875 0.96875 ]
|
|
|
|
mean value: 0.9746676905327416
|
|
|
|
key: test_recall
|
|
value: [0.96875 0.90625 0.875 0.90625 0.875 0.875
|
|
0.875 0.90322581 0.96774194 0.96774194]
|
|
|
|
mean value: 0.9119959677419355
|
|
|
|
key: train_recall
|
|
value: [0.98245614 0.97192982 0.98596491 0.98245614 0.99298246 0.98245614
|
|
0.98245614 0.98601399 0.98951049 0.97552448]
|
|
|
|
mean value: 0.9831750705434916
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.921875 0.875 0.921875 0.92137097 0.84072581
|
|
0.85685484 0.8422379 0.87449597 0.93699597]
|
|
|
|
mean value: 0.8928931451612904
|
|
|
|
key: train_roc_auc
|
|
value: [0.97894737 0.96666667 0.9877193 0.97368421 0.98425347 0.97724206
|
|
0.98248681 0.98072629 0.98422893 0.97197276]
|
|
|
|
mean value: 0.9787927861612072
|
|
|
|
key: test_jcc
|
|
value: [0.88571429 0.85294118 0.77777778 0.85294118 0.84848485 0.73684211
|
|
0.75675676 0.73684211 0.78947368 0.88235294]
|
|
|
|
mean value: 0.8120126857588158
|
|
|
|
key: train_jcc
|
|
value: [0.95890411 0.93581081 0.97569444 0.94915254 0.96917808 0.9556314
|
|
0.96551724 0.96245734 0.96917808 0.94576271]
|
|
|
|
mean value: 0.9587286762045821
|
|
|
|
MCC on Blind test: 0.71
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09277225 0.11120796 0.10793185 0.10711575 0.11093473 0.12785196
|
|
0.11650276 0.08445287 0.10883117 0.11838365]
|
|
|
|
mean value: 0.10859849452972412
|
|
|
|
key: score_time
|
|
value: [0.02065492 0.03160429 0.0220325 0.02657008 0.0324316 0.03746438
|
|
0.04351306 0.02652216 0.03911519 0.03521681]
|
|
|
|
mean value: 0.03151249885559082
|
|
|
|
key: test_mcc
|
|
value: [0.78163175 0.87671401 0.78163175 0.84416229 0.84530217 0.72270545
|
|
0.78094752 0.77822581 0.72407013 0.96875 ]
|
|
|
|
mean value: 0.8104140873134662
|
|
|
|
key: train_mcc
|
|
value: [0.96857012 0.98246219 0.98947978 0.99300691 0.98954653 0.98601347
|
|
0.98601347 0.97548767 0.9757759 0.98254138]
|
|
|
|
mean value: 0.9828897414834783
|
|
|
|
key: test_accuracy
|
|
value: [0.890625 0.9375 0.890625 0.921875 0.92063492 0.85714286
|
|
0.88888889 0.88888889 0.85714286 0.98412698]
|
|
|
|
mean value: 0.9037450396825396
|
|
|
|
key: train_accuracy
|
|
value: [0.98421053 0.99122807 0.99473684 0.99649123 0.99474606 0.99299475
|
|
0.99299475 0.98774081 0.98774081 0.99124343]
|
|
|
|
mean value: 0.9914127262113251
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.93548387 0.89230769 0.92307692 0.91803279 0.86956522
|
|
0.89552239 0.88888889 0.86567164 0.98412698]
|
|
|
|
mean value: 0.9061565282384415
|
|
|
|
key: train_fscore
|
|
value: [0.9840708 0.99124343 0.99472759 0.99647887 0.99470899 0.99295775
|
|
0.99295775 0.98774081 0.98761062 0.99121265]
|
|
|
|
mean value: 0.991370926105971
|
|
|
|
key: test_precision
|
|
value: [0.90322581 0.96666667 0.87878788 0.90909091 0.96551724 0.81081081
|
|
0.85714286 0.875 0.80555556 0.96875 ]
|
|
|
|
mean value: 0.8940547725885601
|
|
|
|
key: train_precision
|
|
value: [0.99285714 0.98951049 0.99647887 1. 1. 0.99646643
|
|
0.99646643 0.98947368 1. 0.99646643]
|
|
|
|
mean value: 0.9957719483103814
|
|
|
|
key: test_recall
|
|
value: [0.875 0.90625 0.90625 0.9375 0.875 0.9375
|
|
0.9375 0.90322581 0.93548387 1. ]
|
|
|
|
mean value: 0.9213709677419355
|
|
|
|
key: train_recall
|
|
value: [0.9754386 0.99298246 0.99298246 0.99298246 0.98947368 0.98947368
|
|
0.98947368 0.98601399 0.97552448 0.98601399]
|
|
|
|
mean value: 0.9870359465096307
|
|
|
|
key: test_roc_auc
|
|
value: [0.890625 0.9375 0.890625 0.921875 0.92137097 0.85584677
|
|
0.88810484 0.8891129 0.85836694 0.984375 ]
|
|
|
|
mean value: 0.9037802419354839
|
|
|
|
key: train_roc_auc
|
|
value: [0.98421053 0.99122807 0.99473684 0.99649123 0.99473684 0.99298859
|
|
0.99298859 0.98774384 0.98776224 0.99125261]
|
|
|
|
mean value: 0.9914139369402527
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.87878788 0.80555556 0.85714286 0.84848485 0.76923077
|
|
0.81081081 0.8 0.76315789 0.96875 ]
|
|
|
|
mean value: 0.8301920614749563
|
|
|
|
key: train_jcc
|
|
value: [0.96864111 0.98263889 0.98951049 0.99298246 0.98947368 0.98601399
|
|
0.98601399 0.97577855 0.97552448 0.9825784 ]
|
|
|
|
mean value: 0.9829156025210628
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21769905 0.2195487 0.23250198 0.23222423 0.26852727 0.24305701
|
|
0.23453808 0.23407316 0.23557377 0.23404908]
|
|
|
|
mean value: 0.23517923355102538
|
|
|
|
key: score_time
|
|
value: [0.02685475 0.02689195 0.02682996 0.0266974 0.02714157 0.02700257
|
|
0.02708054 0.02719736 0.02715063 0.02718496]
|
|
|
|
mean value: 0.027003169059753418
|
|
|
|
key: test_mcc
|
|
value: [0.76354172 0.71910121 0.5336001 0.56360186 0.46146899 0.42871785
|
|
0.49193548 0.3798283 0.43923912 0.58770161]
|
|
|
|
mean value: 0.5368736246899668
|
|
|
|
key: train_mcc
|
|
value: [0.95486219 0.95848494 0.93741933 0.94105208 0.94115791 0.94816792
|
|
0.95154989 0.94115006 0.93777673 0.93051399]
|
|
|
|
mean value: 0.9442135037881351
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.859375 0.765625 0.78125 0.73015873 0.71428571
|
|
0.74603175 0.68253968 0.6984127 0.79365079]
|
|
|
|
mean value: 0.7646329365079365
|
|
|
|
key: train_accuracy
|
|
value: [0.97719298 0.97894737 0.96842105 0.97017544 0.97022767 0.9737303
|
|
0.97548161 0.97022767 0.96847636 0.96497373]
|
|
|
|
mean value: 0.9717854180108766
|
|
|
|
key: test_fscore
|
|
value: [0.88571429 0.86153846 0.7761194 0.77419355 0.74626866 0.72727273
|
|
0.75 0.71428571 0.74666667 0.79365079]
|
|
|
|
mean value: 0.7775710257217239
|
|
|
|
key: train_fscore
|
|
value: [0.9775475 0.97931034 0.96896552 0.9707401 0.9707401 0.97418244
|
|
0.97586207 0.97084048 0.96917808 0.96563574]
|
|
|
|
mean value: 0.9723002378616942
|
|
|
|
key: test_precision
|
|
value: [0.81578947 0.84848485 0.74285714 0.8 0.71428571 0.70588235
|
|
0.75 0.64102564 0.63636364 0.78125 ]
|
|
|
|
mean value: 0.7435938809642371
|
|
|
|
key: train_precision
|
|
value: [0.96258503 0.96271186 0.95254237 0.9527027 0.9527027 0.95608108
|
|
0.95932203 0.95286195 0.94966443 0.94932432]
|
|
|
|
mean value: 0.9550498498403011
|
|
|
|
key: test_recall
|
|
value: [0.96875 0.875 0.8125 0.75 0.78125 0.75
|
|
0.75 0.80645161 0.90322581 0.80645161]
|
|
|
|
mean value: 0.8203629032258064
|
|
|
|
key: train_recall
|
|
value: [0.99298246 0.99649123 0.98596491 0.98947368 0.98947368 0.99298246
|
|
0.99298246 0.98951049 0.98951049 0.98251748]
|
|
|
|
mean value: 0.9901889338731443
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.859375 0.765625 0.78125 0.72933468 0.71370968
|
|
0.74596774 0.68447581 0.7016129 0.79385081]
|
|
|
|
mean value: 0.7650201612903226
|
|
|
|
key: train_roc_auc
|
|
value: [0.97719298 0.97894737 0.96842105 0.97017544 0.97026132 0.97376396
|
|
0.97551221 0.97019384 0.96843946 0.96494295]
|
|
|
|
mean value: 0.9717850570482149
|
|
|
|
key: test_jcc
|
|
value: [0.79487179 0.75675676 0.63414634 0.63157895 0.5952381 0.57142857
|
|
0.6 0.55555556 0.59574468 0.65789474]
|
|
|
|
mean value: 0.6393215480375779
|
|
|
|
key: train_jcc
|
|
value: [0.95608108 0.95945946 0.93979933 0.94314381 0.94314381 0.94966443
|
|
0.95286195 0.94333333 0.94019934 0.93355482]
|
|
|
|
mean value: 0.9461241365611688
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.89135027 0.88410759 0.87596011 0.88601875 0.88395333 0.88206983
|
|
0.87664676 0.90293217 0.88312197 0.88222742]
|
|
|
|
mean value: 0.8848388195037842
|
|
|
|
key: score_time
|
|
value: [0.00947404 0.00941205 0.00930047 0.00934148 0.00985909 0.00946188
|
|
0.0094347 0.00927377 0.00946712 0.00947332]
|
|
|
|
mean value: 0.00944979190826416
|
|
|
|
key: test_mcc
|
|
value: [0.84416229 0.875 0.78163175 0.78163175 0.87487431 0.78094752
|
|
0.74596774 0.77822581 0.72407013 0.93649194]
|
|
|
|
mean value: 0.8123003234581166
|
|
|
|
key: train_mcc
|
|
value: [0.99298246 1. 1. 0.99649736 0.99301901 0.99650345
|
|
0.99299472 0.98949809 0.99299472 0.98954691]
|
|
|
|
mean value: 0.9944036729473622
|
|
|
|
key: test_accuracy
|
|
value: [0.921875 0.9375 0.890625 0.890625 0.93650794 0.88888889
|
|
0.87301587 0.88888889 0.85714286 0.96825397]
|
|
|
|
mean value: 0.9053323412698413
|
|
|
|
key: train_accuracy
|
|
value: [0.99649123 1. 1. 0.99824561 0.99649737 0.99824869
|
|
0.99649737 0.99474606 0.99649737 0.99474606]
|
|
|
|
mean value: 0.9971969766798783
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.9375 0.89230769 0.88888889 0.93548387 0.89552239
|
|
0.875 0.88888889 0.86567164 0.96774194]
|
|
|
|
mean value: 0.9070082229464752
|
|
|
|
key: train_fscore
|
|
value: [0.99649123 1. 1. 0.99824253 0.99647887 0.99824253
|
|
0.99649123 0.9947644 0.9965035 0.99472759]
|
|
|
|
mean value: 0.9971941877567602
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.9375 0.87878788 0.90322581 0.96666667 0.85714286
|
|
0.875 0.875 0.80555556 0.96774194]
|
|
|
|
mean value: 0.8975711609179351
|
|
|
|
key: train_precision
|
|
value: [0.99649123 1. 1. 1. 1. 1.
|
|
0.99649123 0.99303136 0.9965035 1. ]
|
|
|
|
mean value: 0.9982517311528865
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 0.90625 0.875 0.90625 0.9375
|
|
0.875 0.90322581 0.93548387 0.96774194]
|
|
|
|
mean value: 0.9181451612903225
|
|
|
|
key: train_recall
|
|
value: [0.99649123 1. 1. 0.99649123 0.99298246 0.99649123
|
|
0.99649123 0.9965035 0.9965035 0.98951049]
|
|
|
|
mean value: 0.9961464850938535
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.9375 0.890625 0.890625 0.93699597 0.88810484
|
|
0.87298387 0.8891129 0.85836694 0.96824597]
|
|
|
|
mean value: 0.9054435483870967
|
|
|
|
key: train_roc_auc
|
|
value: [0.99649123 1. 1. 0.99824561 0.99649123 0.99824561
|
|
0.99649736 0.99474298 0.99649736 0.99475524]
|
|
|
|
mean value: 0.9971966629861366
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.88235294 0.80555556 0.8 0.87878788 0.81081081
|
|
0.77777778 0.8 0.76315789 0.9375 ]
|
|
|
|
mean value: 0.8313085715988193
|
|
|
|
key: train_jcc
|
|
value: [0.99300699 1. 1. 0.99649123 0.99298246 0.99649123
|
|
0.99300699 0.98958333 0.99303136 0.98951049]
|
|
|
|
mean value: 0.9944104080023528
|
|
|
|
MCC on Blind test: 0.71
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0329814 0.03775144 0.04330111 0.03259683 0.03292823 0.03234887
|
|
0.03247094 0.03311419 0.0330894 0.03246641]
|
|
|
|
mean value: 0.03430488109588623
|
|
|
|
key: score_time
|
|
value: [0.01277876 0.01312947 0.01511431 0.01498032 0.01761007 0.01501441
|
|
0.01503301 0.01509452 0.01503158 0.01518631]
|
|
|
|
mean value: 0.0148972749710083
|
|
|
|
key: test_mcc
|
|
value: [0.17213259 0.32163376 0.28347335 0.22473329 0.11331178 0.10141277
|
|
0.4672925 0.1715272 0.31933319 0.29699435]
|
|
|
|
mean value: 0.24718447769250632
|
|
|
|
key: train_mcc
|
|
value: [0.34935261 0.33007486 0.3365728 0.33333333 0.3515425 0.35467079
|
|
0.31597841 0.35903931 0.33033226 0.38353707]
|
|
|
|
mean value: 0.3444433939887435
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.59375 0.59375 0.578125 0.53968254 0.53968254
|
|
0.68253968 0.53968254 0.58730159 0.61904762]
|
|
|
|
mean value: 0.5836061507936507
|
|
|
|
key: train_accuracy
|
|
value: [0.60877193 0.59824561 0.60175439 0.6 0.60945709 0.61120841
|
|
0.59019264 0.61471103 0.59894921 0.62872154]
|
|
|
|
mean value: 0.6062011859772022
|
|
|
|
key: test_fscore
|
|
value: [0.6744186 0.71111111 0.70454545 0.68965517 0.6741573 0.66666667
|
|
0.76190476 0.6741573 0.70454545 0.7 ]
|
|
|
|
mean value: 0.6961161832579978
|
|
|
|
key: train_fscore
|
|
value: [0.71878941 0.71339174 0.71518193 0.71428571 0.71878941 0.71969697
|
|
0.70895522 0.72222222 0.71410737 0.72959184]
|
|
|
|
mean value: 0.7175011819161466
|
|
|
|
key: test_precision
|
|
value: [0.53703704 0.55172414 0.55357143 0.54545455 0.52631579 0.52727273
|
|
0.61538462 0.51724138 0.54385965 0.57142857]
|
|
|
|
mean value: 0.5489289880986796
|
|
|
|
key: train_precision
|
|
value: [0.56102362 0.55447471 0.55664062 0.55555556 0.56102362 0.56213018
|
|
0.54913295 0.56521739 0.55533981 0.57429719]
|
|
|
|
mean value: 0.5594835644197532
|
|
|
|
key: test_recall
|
|
value: [0.90625 1. 0.96875 0.9375 0.9375 0.90625
|
|
1. 0.96774194 1. 0.90322581]
|
|
|
|
mean value: 0.9527217741935484
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.59375 0.59375 0.578125 0.53326613 0.53377016
|
|
0.67741935 0.54637097 0.59375 0.6234879 ]
|
|
|
|
mean value: 0.5836189516129032
|
|
|
|
key: train_roc_auc
|
|
value: [0.60877193 0.59824561 0.60175439 0.6 0.61013986 0.61188811
|
|
0.59090909 0.61403509 0.59824561 0.62807018]
|
|
|
|
mean value: 0.6062059869954607
|
|
|
|
key: test_jcc
|
|
value: [0.50877193 0.55172414 0.54385965 0.52631579 0.50847458 0.5
|
|
0.61538462 0.50847458 0.54385965 0.53846154]
|
|
|
|
mean value: 0.5345326461863421
|
|
|
|
key: train_jcc
|
|
value: [0.56102362 0.55447471 0.55664062 0.55555556 0.56102362 0.56213018
|
|
0.54913295 0.56521739 0.55533981 0.57429719]
|
|
|
|
mean value: 0.5594835644197532
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03066587 0.03988957 0.05028677 0.04420638 0.04806757 0.04310489
|
|
0.03911352 0.03920937 0.03922009 0.03915834]
|
|
|
|
mean value: 0.041292238235473636
|
|
|
|
key: score_time
|
|
value: [0.01892376 0.01950431 0.01943779 0.01930618 0.01957011 0.01895785
|
|
0.01866388 0.01875091 0.01871824 0.01869988]
|
|
|
|
mean value: 0.0190532922744751
|
|
|
|
key: test_mcc
|
|
value: [0.65657067 0.6644106 0.59404013 0.71910121 0.62939541 0.61982085
|
|
0.72270545 0.53874599 0.64134943 0.62939541]
|
|
|
|
mean value: 0.6415535145585206
|
|
|
|
key: train_mcc
|
|
value: [0.78479784 0.7840214 0.77698982 0.7922879 0.78479063 0.79883396
|
|
0.78109075 0.77367788 0.77401834 0.80014056]
|
|
|
|
mean value: 0.7850649085161974
|
|
|
|
key: test_accuracy
|
|
value: [0.828125 0.828125 0.796875 0.859375 0.80952381 0.80952381
|
|
0.85714286 0.76190476 0.80952381 0.80952381]
|
|
|
|
mean value: 0.8169642857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.89122807 0.89122807 0.8877193 0.89473684 0.89141856 0.89842382
|
|
0.88966725 0.88616462 0.88616462 0.89842382]
|
|
|
|
mean value: 0.8915174977724521
|
|
|
|
key: test_fscore
|
|
value: [0.83076923 0.84057971 0.8 0.85714286 0.82857143 0.81818182
|
|
0.86956522 0.7826087 0.82857143 0.78571429]
|
|
|
|
mean value: 0.8241704672139455
|
|
|
|
key: train_fscore
|
|
value: [0.89527027 0.89455782 0.89115646 0.8989899 0.89491525 0.90169492
|
|
0.89303905 0.88964346 0.89001692 0.90301003]
|
|
|
|
mean value: 0.8952294091118016
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.78378378 0.78787879 0.87096774 0.76315789 0.79411765
|
|
0.81081081 0.71052632 0.74358974 0.88 ]
|
|
|
|
mean value: 0.7963014543765567
|
|
|
|
key: train_precision
|
|
value: [0.86319218 0.8679868 0.86468647 0.86407767 0.86557377 0.87213115
|
|
0.86513158 0.86468647 0.86229508 0.86538462]
|
|
|
|
mean value: 0.8655145782618917
|
|
|
|
key: test_recall
|
|
value: [0.84375 0.90625 0.8125 0.84375 0.90625 0.84375
|
|
0.9375 0.87096774 0.93548387 0.70967742]
|
|
|
|
mean value: 0.8609879032258064
|
|
|
|
key: train_recall
|
|
value: [0.92982456 0.92280702 0.91929825 0.93684211 0.92631579 0.93333333
|
|
0.92280702 0.91608392 0.91958042 0.94405594]
|
|
|
|
mean value: 0.9270948349895718
|
|
|
|
key: test_roc_auc
|
|
value: [0.828125 0.828125 0.796875 0.859375 0.80796371 0.80897177
|
|
0.85584677 0.76360887 0.81149194 0.80796371]
|
|
|
|
mean value: 0.8168346774193548
|
|
|
|
key: train_roc_auc
|
|
value: [0.89122807 0.89122807 0.8877193 0.89473684 0.89147957 0.89848485
|
|
0.88972519 0.88611213 0.886106 0.89834376]
|
|
|
|
mean value: 0.8915163783584836
|
|
|
|
key: test_jcc
|
|
value: [0.71052632 0.725 0.66666667 0.75 0.70731707 0.69230769
|
|
0.76923077 0.64285714 0.70731707 0.64705882]
|
|
|
|
mean value: 0.701828155672262
|
|
|
|
key: train_jcc
|
|
value: [0.81039755 0.80923077 0.80368098 0.81651376 0.80981595 0.82098765
|
|
0.80674847 0.80122324 0.80182927 0.82317073]
|
|
|
|
mean value: 0.8103598378899687
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.18102288 0.29600692 0.18413353 0.21506524 0.22669625 0.33359575
|
|
0.33408237 0.31031942 0.31877875 0.30942845]
|
|
|
|
mean value: 0.2709129571914673
|
|
|
|
key: score_time
|
|
value: [0.01897454 0.01883221 0.01218867 0.01218319 0.01881433 0.01916432
|
|
0.01888371 0.01883912 0.01884961 0.01881719]
|
|
|
|
mean value: 0.017554688453674316
|
|
|
|
key: test_mcc
|
|
value: [0.65657067 0.6644106 0.56360186 0.790965 0.68740835 0.61982085
|
|
0.72270545 0.53874599 0.64134943 0.62939541]
|
|
|
|
mean value: 0.6514973606605419
|
|
|
|
key: train_mcc
|
|
value: [0.78479784 0.7840214 0.80845708 0.80881692 0.83990276 0.79883396
|
|
0.78109075 0.77367788 0.77401834 0.80014056]
|
|
|
|
mean value: 0.7953757483531136
|
|
|
|
key: test_accuracy
|
|
value: [0.828125 0.828125 0.78125 0.890625 0.84126984 0.80952381
|
|
0.85714286 0.76190476 0.80952381 0.80952381]
|
|
|
|
mean value: 0.8217013888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.89122807 0.89122807 0.90350877 0.90350877 0.91943958 0.89842382
|
|
0.88966725 0.88616462 0.88616462 0.89842382]
|
|
|
|
mean value: 0.8967757396995115
|
|
|
|
key: test_fscore
|
|
value: [0.83076923 0.84057971 0.77419355 0.88135593 0.85294118 0.81818182
|
|
0.86956522 0.7826087 0.82857143 0.78571429]
|
|
|
|
mean value: 0.8264481043486244
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.89527027 0.89455782 0.90630324 0.90662139 0.92123288 0.90169492
|
|
0.89303905 0.88964346 0.89001692 0.90301003]
|
|
|
|
mean value: 0.9001389981005551
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.78378378 0.8 0.96296296 0.80555556 0.79411765
|
|
0.81081081 0.71052632 0.74358974 0.88 ]
|
|
|
|
mean value: 0.8109528637732972
|
|
|
|
key: train_precision
|
|
value: [0.86319218 0.8679868 0.8807947 0.87828947 0.89966555 0.87213115
|
|
0.86513158 0.86468647 0.86229508 0.86538462]
|
|
|
|
mean value: 0.8719557601087767
|
|
|
|
key: test_recall
|
|
value: [0.84375 0.90625 0.75 0.8125 0.90625 0.84375
|
|
0.9375 0.87096774 0.93548387 0.70967742]
|
|
|
|
mean value: 0.8516129032258064
|
|
|
|
key: train_recall
|
|
value: [0.92982456 0.92280702 0.93333333 0.93684211 0.94385965 0.93333333
|
|
0.92280702 0.91608392 0.91958042 0.94405594]
|
|
|
|
mean value: 0.9302527297264139
|
|
|
|
key: test_roc_auc
|
|
value: [0.828125 0.828125 0.78125 0.890625 0.84022177 0.80897177
|
|
0.85584677 0.76360887 0.81149194 0.80796371]
|
|
|
|
mean value: 0.8216229838709678
|
|
|
|
key: train_roc_auc
|
|
value: [0.89122807 0.89122807 0.90350877 0.90350877 0.91948227 0.89848485
|
|
0.88972519 0.88611213 0.886106 0.89834376]
|
|
|
|
mean value: 0.8967727886148938
|
|
|
|
key: test_jcc
|
|
value: [0.71052632 0.725 0.63157895 0.78787879 0.74358974 0.69230769
|
|
0.76923077 0.64285714 0.70731707 0.64705882]
|
|
|
|
mean value: 0.7057345295722174
|
|
|
|
key: train_jcc
|
|
value: [0.81039755 0.80923077 0.82866044 0.82919255 0.85396825 0.82098765
|
|
0.80674847 0.80122324 0.80182927 0.82317073]
|
|
|
|
mean value: 0.8185408921605636
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03537369 0.03771615 0.03843212 0.03799272 0.0374248 0.0644691
|
|
0.06080461 0.05791664 0.03776813 0.04242468]
|
|
|
|
mean value: 0.04503226280212402
|
|
|
|
key: score_time
|
|
value: [0.01476192 0.01482749 0.01522851 0.01489902 0.0200181 0.02211308
|
|
0.01651382 0.02252531 0.01618242 0.01794791]
|
|
|
|
mean value: 0.01750175952911377
|
|
|
|
key: test_mcc
|
|
value: [0.56360186 0.62622429 0.46897905 0.75 0.50663549 0.65085805
|
|
0.65315611 0.5026181 0.56086231 0.65821474]
|
|
|
|
mean value: 0.5941149998637926
|
|
|
|
key: train_mcc
|
|
value: [0.72381022 0.71653529 0.69261083 0.71314164 0.72366652 0.7206799
|
|
0.72384769 0.73407475 0.73442906 0.70349326]
|
|
|
|
mean value: 0.7186289159760348
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.8125 0.734375 0.875 0.74603175 0.82539683
|
|
0.82539683 0.74603175 0.77777778 0.82539683]
|
|
|
|
mean value: 0.7949156746031746
|
|
|
|
key: train_accuracy
|
|
value: [0.86140351 0.85789474 0.84561404 0.85614035 0.86164623 0.85989492
|
|
0.86164623 0.86690018 0.86690018 0.85113835]
|
|
|
|
mean value: 0.8589178726149875
|
|
|
|
key: test_fscore
|
|
value: [0.77419355 0.81818182 0.73015873 0.875 0.77777778 0.83076923
|
|
0.8358209 0.76470588 0.78787879 0.80701754]
|
|
|
|
mean value: 0.800150421488842
|
|
|
|
key: train_fscore
|
|
value: [0.86495726 0.86106346 0.85034014 0.85958904 0.86355786 0.8630137
|
|
0.86402754 0.86896552 0.86986301 0.85568761]
|
|
|
|
mean value: 0.8621065139729672
|
|
|
|
key: test_precision
|
|
value: [0.8 0.79411765 0.74193548 0.875 0.7 0.81818182
|
|
0.8 0.7027027 0.74285714 0.88461538]
|
|
|
|
mean value: 0.785941017928684
|
|
|
|
key: train_precision
|
|
value: [0.84333333 0.84228188 0.82508251 0.83946488 0.85034014 0.84280936
|
|
0.84797297 0.85714286 0.85234899 0.83168317]
|
|
|
|
mean value: 0.8432460096046102
|
|
|
|
key: test_recall
|
|
value: [0.75 0.84375 0.71875 0.875 0.875 0.84375
|
|
0.875 0.83870968 0.83870968 0.74193548]
|
|
|
|
mean value: 0.8200604838709677
|
|
|
|
key: train_recall
|
|
value: [0.8877193 0.88070175 0.87719298 0.88070175 0.87719298 0.88421053
|
|
0.88070175 0.88111888 0.88811189 0.88111888]
|
|
|
|
mean value: 0.8818770702981229
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.8125 0.734375 0.875 0.74395161 0.82510081
|
|
0.82459677 0.74747984 0.77872984 0.82409274]
|
|
|
|
mean value: 0.7947076612903226
|
|
|
|
key: train_roc_auc
|
|
value: [0.86140351 0.85789474 0.84561404 0.85614035 0.86167341 0.85993743
|
|
0.86167955 0.86687523 0.86686296 0.85108576]
|
|
|
|
mean value: 0.85891669733775
|
|
|
|
key: test_jcc
|
|
value: [0.63157895 0.69230769 0.575 0.77777778 0.63636364 0.71052632
|
|
0.71794872 0.61904762 0.65 0.67647059]
|
|
|
|
mean value: 0.6687021294838632
|
|
|
|
key: train_jcc
|
|
value: [0.76204819 0.7560241 0.73964497 0.75375375 0.75987842 0.75903614
|
|
0.76060606 0.76829268 0.76969697 0.74777448]
|
|
|
|
mean value: 0.7576755771297808
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.83497286 0.88039732 0.9799037 0.91570306 1.0464592 0.90148354
|
|
1.06532311 0.94581509 1.02171636 0.94928885]
|
|
|
|
mean value: 0.9541063070297241
|
|
|
|
key: score_time
|
|
value: [0.01470733 0.01652384 0.01522231 0.01531196 0.0153327 0.01528358
|
|
0.01523829 0.01887465 0.02170634 0.0123136 ]
|
|
|
|
mean value: 0.016051459312438964
|
|
|
|
key: test_mcc
|
|
value: [0.62622429 0.790965 0.6011334 0.75592895 0.67763983 0.65315611
|
|
0.72270545 0.71790017 0.59049817 0.74722285]
|
|
|
|
mean value: 0.6883374204864343
|
|
|
|
key: train_mcc
|
|
value: [0.87377027 0.88425952 0.84975598 0.87745769 0.86709616 0.82520071
|
|
0.87767677 0.86742393 0.89511471 0.88484639]
|
|
|
|
mean value: 0.8702602118509842
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.890625 0.796875 0.875 0.82539683 0.82539683
|
|
0.85714286 0.85714286 0.79365079 0.87301587]
|
|
|
|
mean value: 0.8406746031746032
|
|
|
|
key: train_accuracy
|
|
value: [0.93684211 0.94210526 0.9245614 0.93859649 0.93345009 0.91243433
|
|
0.93870403 0.93345009 0.9474606 0.94220665]
|
|
|
|
mean value: 0.9349811042492395
|
|
|
|
key: test_fscore
|
|
value: [0.80645161 0.89855072 0.77966102 0.86666667 0.84931507 0.8358209
|
|
0.86956522 0.86153846 0.8 0.86666667]
|
|
|
|
mean value: 0.8434236330768697
|
|
|
|
key: train_fscore
|
|
value: [0.93728223 0.94240838 0.92598967 0.93934142 0.93402778 0.91349481
|
|
0.93934142 0.9347079 0.94809689 0.94320138]
|
|
|
|
mean value: 0.9357891876189721
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83783784 0.85185185 0.92857143 0.75609756 0.8
|
|
0.81081081 0.82352941 0.76470588 0.89655172]
|
|
|
|
mean value: 0.830328984163645
|
|
|
|
key: train_precision
|
|
value: [0.93079585 0.9375 0.90878378 0.92808219 0.92439863 0.90102389
|
|
0.92808219 0.91891892 0.93835616 0.92881356]
|
|
|
|
mean value: 0.9244755173935344
|
|
|
|
key: test_recall
|
|
value: [0.78125 0.96875 0.71875 0.8125 0.96875 0.875
|
|
0.9375 0.90322581 0.83870968 0.83870968]
|
|
|
|
mean value: 0.8643145161290322
|
|
|
|
key: train_recall
|
|
value: [0.94385965 0.94736842 0.94385965 0.95087719 0.94385965 0.92631579
|
|
0.95087719 0.95104895 0.95804196 0.95804196]
|
|
|
|
mean value: 0.9474150410992517
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.890625 0.796875 0.875 0.82308468 0.82459677
|
|
0.85584677 0.8578629 0.79435484 0.87247984]
|
|
|
|
mean value: 0.8403225806451613
|
|
|
|
key: train_roc_auc
|
|
value: [0.93684211 0.94210526 0.9245614 0.93859649 0.93346829 0.91245859
|
|
0.93872531 0.93341921 0.94744203 0.94217887]
|
|
|
|
mean value: 0.9349797570850202
|
|
|
|
key: test_jcc
|
|
value: [0.67567568 0.81578947 0.63888889 0.76470588 0.73809524 0.71794872
|
|
0.76923077 0.75675676 0.66666667 0.76470588]
|
|
|
|
mean value: 0.7308463951652806
|
|
|
|
key: train_jcc
|
|
value: [0.88196721 0.89108911 0.86217949 0.88562092 0.8762215 0.84076433
|
|
0.88562092 0.87741935 0.90131579 0.89250814]
|
|
|
|
mean value: 0.8794706756486887
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01466441 0.01140618 0.0103066 0.01056147 0.01054859 0.01025057
|
|
0.01027822 0.01057982 0.01022387 0.01138043]
|
|
|
|
mean value: 0.011020016670227051
|
|
|
|
key: score_time
|
|
value: [0.01233292 0.00939727 0.00900912 0.00904417 0.0090394 0.00901127
|
|
0.00909233 0.00893521 0.00901937 0.00898433]
|
|
|
|
mean value: 0.009386539459228516
|
|
|
|
key: test_mcc
|
|
value: [0.40804713 0.59404013 0.50395263 0.65657067 0.4647426 0.36661779
|
|
0.46743768 0.36629686 0.33569416 0.37474278]
|
|
|
|
mean value: 0.4538142428991961
|
|
|
|
key: train_mcc
|
|
value: [0.50180381 0.47378922 0.47754881 0.46667816 0.49609564 0.48166961
|
|
0.43632328 0.49918401 0.516636 0.51038486]
|
|
|
|
mean value: 0.4860113400738339
|
|
|
|
key: test_accuracy
|
|
value: [0.703125 0.796875 0.75 0.828125 0.73015873 0.68253968
|
|
0.73015873 0.68253968 0.66666667 0.68253968]
|
|
|
|
mean value: 0.7252728174603175
|
|
|
|
key: train_accuracy
|
|
value: [0.75087719 0.73684211 0.73859649 0.73333333 0.74781086 0.7408056
|
|
0.71103327 0.74956217 0.75831874 0.75481611]
|
|
|
|
mean value: 0.742199588287707
|
|
|
|
key: test_fscore
|
|
value: [0.71641791 0.8 0.73333333 0.82539683 0.75362319 0.70588235
|
|
0.71186441 0.6875 0.67692308 0.62962963]
|
|
|
|
mean value: 0.7240570723857261
|
|
|
|
key: train_fscore
|
|
value: [0.75261324 0.73958333 0.74354561 0.73426573 0.75257732 0.74216028
|
|
0.66800805 0.75216638 0.75874126 0.76190476]
|
|
|
|
mean value: 0.7405565964118
|
|
|
|
key: test_precision
|
|
value: [0.68571429 0.78787879 0.78571429 0.83870968 0.7027027 0.66666667
|
|
0.77777778 0.66666667 0.64705882 0.73913043]
|
|
|
|
mean value: 0.7298020108852549
|
|
|
|
key: train_precision
|
|
value: [0.74740484 0.73195876 0.72972973 0.73170732 0.73737374 0.73702422
|
|
0.78301887 0.74570447 0.75874126 0.74172185]
|
|
|
|
mean value: 0.7444385061131555
|
|
|
|
key: test_recall
|
|
value: [0.75 0.8125 0.6875 0.8125 0.8125 0.75
|
|
0.65625 0.70967742 0.70967742 0.5483871 ]
|
|
|
|
mean value: 0.724899193548387
|
|
|
|
key: train_recall
|
|
value: [0.75789474 0.74736842 0.75789474 0.73684211 0.76842105 0.74736842
|
|
0.58245614 0.75874126 0.75874126 0.78321678]
|
|
|
|
mean value: 0.7398944914734389
|
|
|
|
key: test_roc_auc
|
|
value: [0.703125 0.796875 0.75 0.828125 0.72883065 0.68145161
|
|
0.73135081 0.68296371 0.66733871 0.68044355]
|
|
|
|
mean value: 0.7250504032258065
|
|
|
|
key: train_roc_auc
|
|
value: [0.75087719 0.73684211 0.73859649 0.73333333 0.74784689 0.74081708
|
|
0.71080849 0.74954607 0.758318 0.75476629]
|
|
|
|
mean value: 0.7421751932278249
|
|
|
|
key: test_jcc
|
|
value: [0.55813953 0.66666667 0.57894737 0.7027027 0.60465116 0.54545455
|
|
0.55263158 0.52380952 0.51162791 0.45945946]
|
|
|
|
mean value: 0.5704090450112482
|
|
|
|
key: train_jcc
|
|
value: [0.60335196 0.58677686 0.59178082 0.5801105 0.60330579 0.5900277
|
|
0.50151057 0.60277778 0.61126761 0.61538462]
|
|
|
|
mean value: 0.5886294192736087
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01070476 0.01045203 0.01045537 0.01056457 0.01051593 0.01061487
|
|
0.01065612 0.01072598 0.01050091 0.01057959]
|
|
|
|
mean value: 0.010577011108398437
|
|
|
|
key: score_time
|
|
value: [0.00903749 0.00892234 0.00901079 0.00903344 0.00917768 0.00899863
|
|
0.00909567 0.00893569 0.0089848 0.00909686]
|
|
|
|
mean value: 0.009029340744018555
|
|
|
|
key: test_mcc
|
|
value: [0.53150959 0.50395263 0.21971769 0.44539933 0.49493401 0.52419355
|
|
0.46146899 0.37363667 0.33366935 0.58770161]
|
|
|
|
mean value: 0.4476183430779835
|
|
|
|
key: train_mcc
|
|
value: [0.55284089 0.54886043 0.51932702 0.52686418 0.54489338 0.48908468
|
|
0.55937838 0.5535563 0.52463264 0.52543149]
|
|
|
|
mean value: 0.5344869395135066
|
|
|
|
key: test_accuracy
|
|
value: [0.765625 0.75 0.609375 0.71875 0.74603175 0.76190476
|
|
0.73015873 0.68253968 0.66666667 0.79365079]
|
|
|
|
mean value: 0.7224702380952381
|
|
|
|
key: train_accuracy
|
|
value: [0.7754386 0.77368421 0.75964912 0.76315789 0.77232925 0.74430823
|
|
0.7793345 0.77583187 0.76182137 0.76182137]
|
|
|
|
mean value: 0.7667376409500107
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.76470588 0.59016393 0.74285714 0.76470588 0.76190476
|
|
0.74626866 0.70588235 0.66666667 0.79365079]
|
|
|
|
mean value: 0.7298710835773833
|
|
|
|
key: train_fscore
|
|
value: [0.78451178 0.78172589 0.7609075 0.76843911 0.77508651 0.74914089
|
|
0.78424658 0.7852349 0.76949153 0.77181208]
|
|
|
|
mean value: 0.7730596764554477
|
|
|
|
key: test_precision
|
|
value: [0.77419355 0.72222222 0.62068966 0.68421053 0.72222222 0.77419355
|
|
0.71428571 0.64864865 0.65625 0.78125 ]
|
|
|
|
mean value: 0.7098166085641204
|
|
|
|
key: train_precision
|
|
value: [0.75404531 0.75490196 0.75694444 0.75167785 0.76450512 0.73400673
|
|
0.76588629 0.75483871 0.74671053 0.74193548]
|
|
|
|
mean value: 0.7525452425971371
|
|
|
|
key: test_recall
|
|
value: [0.75 0.8125 0.5625 0.8125 0.8125 0.75
|
|
0.78125 0.77419355 0.67741935 0.80645161]
|
|
|
|
mean value: 0.7539314516129032
|
|
|
|
key: train_recall
|
|
value: [0.81754386 0.81052632 0.76491228 0.78596491 0.78596491 0.76491228
|
|
0.80350877 0.81818182 0.79370629 0.8041958 ]
|
|
|
|
mean value: 0.794941724941725
|
|
|
|
key: test_roc_auc
|
|
value: [0.765625 0.75 0.609375 0.71875 0.74495968 0.76209677
|
|
0.72933468 0.68397177 0.66683468 0.79385081]
|
|
|
|
mean value: 0.7224798387096774
|
|
|
|
key: train_roc_auc
|
|
value: [0.7754386 0.77368421 0.75964912 0.76315789 0.77235309 0.74434425
|
|
0.77937676 0.77575758 0.76176543 0.76174702]
|
|
|
|
mean value: 0.766727395411606
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.61904762 0.41860465 0.59090909 0.61904762 0.61538462
|
|
0.5952381 0.54545455 0.5 0.65789474]
|
|
|
|
mean value: 0.5776965588471097
|
|
|
|
key: train_jcc
|
|
value: [0.64542936 0.64166667 0.61408451 0.62395543 0.63276836 0.5989011
|
|
0.64507042 0.64640884 0.62534435 0.6284153 ]
|
|
|
|
mean value: 0.6302044344305446
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00986624 0.01092362 0.01090121 0.00989747 0.00991488 0.01093745
|
|
0.01010799 0.01105952 0.01068306 0.00987339]
|
|
|
|
mean value: 0.010416483879089356
|
|
|
|
key: score_time
|
|
value: [0.01277661 0.01334596 0.01341963 0.01303458 0.01297259 0.0130651
|
|
0.01314664 0.01294398 0.0128777 0.01310587]
|
|
|
|
mean value: 0.013068866729736329
|
|
|
|
key: test_mcc
|
|
value: [0.44539933 0.375 0.15694121 0.40804713 0.20585278 0.39757328
|
|
0.39656932 0.18817455 0.32137677 0.33569416]
|
|
|
|
mean value: 0.32306285455062617
|
|
|
|
key: train_mcc
|
|
value: [0.60515381 0.5803782 0.59629523 0.62879079 0.60786984 0.61994057
|
|
0.62361994 0.58865786 0.59821633 0.59511948]
|
|
|
|
mean value: 0.6044042041011564
|
|
|
|
key: test_accuracy
|
|
value: [0.71875 0.6875 0.578125 0.703125 0.6031746 0.6984127
|
|
0.6984127 0.58730159 0.65079365 0.66666667]
|
|
|
|
mean value: 0.6592261904761905
|
|
|
|
key: train_accuracy
|
|
value: [0.80175439 0.78947368 0.79649123 0.8122807 0.80210158 0.80910683
|
|
0.81085814 0.78984238 0.79859895 0.79509632]
|
|
|
|
mean value: 0.8005604203152364
|
|
|
|
key: test_fscore
|
|
value: [0.74285714 0.6875 0.55737705 0.71641791 0.61538462 0.71641791
|
|
0.70769231 0.63888889 0.69444444 0.67692308]
|
|
|
|
mean value: 0.6753903346266327
|
|
|
|
key: train_fscore
|
|
value: [0.80879865 0.79661017 0.80666667 0.8225539 0.81198003 0.81556684
|
|
0.81756757 0.80707395 0.80475382 0.80788177]
|
|
|
|
mean value: 0.8099453364834789
|
|
|
|
key: test_precision
|
|
value: [0.68421053 0.6875 0.5862069 0.68571429 0.60606061 0.68571429
|
|
0.6969697 0.56097561 0.6097561 0.64705882]
|
|
|
|
mean value: 0.6450166828172873
|
|
|
|
key: train_precision
|
|
value: [0.78104575 0.7704918 0.76825397 0.77987421 0.7721519 0.7875817
|
|
0.78827362 0.74702381 0.78217822 0.76160991]
|
|
|
|
mean value: 0.7738484885185218
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.6875 0.53125 0.75 0.625 0.75
|
|
0.71875 0.74193548 0.80645161 0.70967742]
|
|
|
|
mean value: 0.7133064516129032
|
|
|
|
key: train_recall
|
|
value: [0.83859649 0.8245614 0.84912281 0.87017544 0.85614035 0.84561404
|
|
0.84912281 0.87762238 0.82867133 0.86013986]
|
|
|
|
mean value: 0.84997668997669
|
|
|
|
key: test_roc_auc
|
|
value: [0.71875 0.6875 0.578125 0.703125 0.60282258 0.69758065
|
|
0.69808468 0.58971774 0.65322581 0.66733871]
|
|
|
|
mean value: 0.6596270161290323
|
|
|
|
key: train_roc_auc
|
|
value: [0.80175439 0.78947368 0.79649123 0.8122807 0.80219605 0.80917065
|
|
0.81092504 0.78968838 0.79854619 0.79498221]
|
|
|
|
mean value: 0.8005508526561158
|
|
|
|
key: test_jcc
|
|
value: [0.59090909 0.52380952 0.38636364 0.55813953 0.44444444 0.55813953
|
|
0.54761905 0.46938776 0.53191489 0.51162791]
|
|
|
|
mean value: 0.5122355368608992
|
|
|
|
key: train_jcc
|
|
value: [0.67897727 0.66197183 0.67597765 0.69859155 0.68347339 0.68857143
|
|
0.69142857 0.67654987 0.67329545 0.67768595]
|
|
|
|
mean value: 0.6806522966183778
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02857804 0.02860475 0.02841306 0.02905703 0.0280447 0.02880526
|
|
0.02847433 0.0280602 0.02867246 0.02894044]
|
|
|
|
mean value: 0.028565025329589842
|
|
|
|
key: score_time
|
|
value: [0.01360965 0.01362467 0.01358318 0.01370263 0.01335478 0.01353049
|
|
0.0134654 0.01333475 0.0136447 0.01354122]
|
|
|
|
mean value: 0.01353914737701416
|
|
|
|
key: test_mcc
|
|
value: [0.62622429 0.53150959 0.50395263 0.62622429 0.47011536 0.62325024
|
|
0.55611985 0.43812738 0.5026181 0.68740835]
|
|
|
|
mean value: 0.5565550079906242
|
|
|
|
key: train_mcc
|
|
value: [0.70869167 0.67488591 0.67886662 0.69787564 0.68539141 0.67649884
|
|
0.70019913 0.71082339 0.69984368 0.69230073]
|
|
|
|
mean value: 0.6925377015905695
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.765625 0.75 0.8125 0.73015873 0.80952381
|
|
0.77777778 0.71428571 0.74603175 0.84126984]
|
|
|
|
mean value: 0.775967261904762
|
|
|
|
key: train_accuracy
|
|
value: [0.85263158 0.83684211 0.83859649 0.84736842 0.84238179 0.83712785
|
|
0.84938704 0.85464098 0.84938704 0.8441331 ]
|
|
|
|
mean value: 0.8452496389836237
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.76923077 0.73333333 0.81818182 0.76056338 0.82352941
|
|
0.78787879 0.73529412 0.76470588 0.82758621]
|
|
|
|
mean value: 0.7838485525749475
|
|
|
|
key: train_fscore
|
|
value: [0.85953177 0.84156729 0.8440678 0.85427136 0.84536082 0.84317032
|
|
0.8537415 0.85956007 0.8537415 0.85240464]
|
|
|
|
mean value: 0.8507417066756678
|
|
|
|
key: test_precision
|
|
value: [0.79411765 0.75757576 0.78571429 0.79411765 0.69230769 0.77777778
|
|
0.76470588 0.67567568 0.7027027 0.88888889]
|
|
|
|
mean value: 0.7633583957113369
|
|
|
|
key: train_precision
|
|
value: [0.82108626 0.81788079 0.81639344 0.81730769 0.82828283 0.81168831
|
|
0.82838284 0.83278689 0.83112583 0.81072555]
|
|
|
|
mean value: 0.8215660434979373
|
|
|
|
key: test_recall
|
|
value: [0.84375 0.78125 0.6875 0.84375 0.84375 0.875
|
|
0.8125 0.80645161 0.83870968 0.77419355]
|
|
|
|
mean value: 0.8106854838709677
|
|
|
|
key: train_recall
|
|
value: [0.90175439 0.86666667 0.87368421 0.89473684 0.86315789 0.87719298
|
|
0.88070175 0.88811189 0.87762238 0.8986014 ]
|
|
|
|
mean value: 0.882223040117777
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.765625 0.75 0.8125 0.72832661 0.80846774
|
|
0.77721774 0.71572581 0.74747984 0.84022177]
|
|
|
|
mean value: 0.7758064516129032
|
|
|
|
key: train_roc_auc
|
|
value: [0.85263158 0.83684211 0.83859649 0.84736842 0.84241811 0.83719789
|
|
0.84944179 0.85458226 0.8493375 0.84403754]
|
|
|
|
mean value: 0.8452453686664213
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.625 0.57894737 0.69230769 0.61363636 0.7
|
|
0.65 0.58139535 0.61904762 0.70588235]
|
|
|
|
mean value: 0.6458524437498806
|
|
|
|
key: train_jcc
|
|
value: [0.75366569 0.72647059 0.73020528 0.74561404 0.73214286 0.72886297
|
|
0.74480712 0.7537092 0.74480712 0.74277457]
|
|
|
|
mean value: 0.7403059430579226
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.91805243 2.02247119 1.97110653 1.8196218 2.15600729 2.25056601
|
|
2.00501966 2.01817799 1.94140291 2.08676553]
|
|
|
|
mean value: 2.0189191341400146
|
|
|
|
key: score_time
|
|
value: [0.01340175 0.0231154 0.01601505 0.01246262 0.0241096 0.02291393
|
|
0.01671243 0.01519299 0.01519251 0.01688886]
|
|
|
|
mean value: 0.017600512504577635
|
|
|
|
key: test_mcc
|
|
value: [0.50097943 0.75592895 0.65657067 0.71910121 0.57258185 0.61103872
|
|
0.55611985 0.68415777 0.66853948 0.65315611]
|
|
|
|
mean value: 0.6378174031294251
|
|
|
|
key: train_mcc
|
|
value: [0.96857012 0.95087719 0.95453287 0.95146307 0.98949822 0.96176174
|
|
0.96497362 0.94760416 0.96218292 0.96161729]
|
|
|
|
mean value: 0.9613081218627331
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.875 0.828125 0.859375 0.77777778 0.79365079
|
|
0.77777778 0.84126984 0.82539683 0.82539683]
|
|
|
|
mean value: 0.8153769841269841
|
|
|
|
key: train_accuracy
|
|
value: [0.98421053 0.9754386 0.97719298 0.9754386 0.99474606 0.98073555
|
|
0.98248687 0.9737303 0.98073555 0.98073555]
|
|
|
|
mean value: 0.9805450579162442
|
|
|
|
key: test_fscore
|
|
value: [0.75757576 0.88235294 0.82539683 0.86153846 0.80555556 0.82191781
|
|
0.78787879 0.84375 0.84057971 0.81355932]
|
|
|
|
mean value: 0.8240105169519862
|
|
|
|
key: train_fscore
|
|
value: [0.98434783 0.9754386 0.9773913 0.97586207 0.99474606 0.98093588
|
|
0.98245614 0.97400347 0.98113208 0.98093588]
|
|
|
|
mean value: 0.9807249287896543
|
|
|
|
key: test_precision
|
|
value: [0.73529412 0.83333333 0.83870968 0.84848485 0.725 0.73170732
|
|
0.76470588 0.81818182 0.76315789 0.85714286]
|
|
|
|
mean value: 0.7915717746372225
|
|
|
|
key: train_precision
|
|
value: [0.97586207 0.9754386 0.96896552 0.95932203 0.99300699 0.96917808
|
|
0.98245614 0.96563574 0.96296296 0.97250859]
|
|
|
|
mean value: 0.9725336725005951
|
|
|
|
key: test_recall
|
|
value: [0.78125 0.9375 0.8125 0.875 0.90625 0.9375
|
|
0.8125 0.87096774 0.93548387 0.77419355]
|
|
|
|
mean value: 0.8643145161290322
|
|
|
|
key: train_recall
|
|
value: [0.99298246 0.9754386 0.98596491 0.99298246 0.99649123 0.99298246
|
|
0.98245614 0.98251748 1. 0.98951049]
|
|
|
|
mean value: 0.9891326217642007
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.875 0.828125 0.859375 0.77570565 0.79133065
|
|
0.77721774 0.84173387 0.82711694 0.82459677]
|
|
|
|
mean value: 0.8150201612903226
|
|
|
|
key: train_roc_auc
|
|
value: [0.98421053 0.9754386 0.97719298 0.9754386 0.99474911 0.98075696
|
|
0.98248681 0.97371488 0.98070175 0.98072016]
|
|
|
|
mean value: 0.9805410379094589
|
|
|
|
key: test_jcc
|
|
value: [0.6097561 0.78947368 0.7027027 0.75675676 0.6744186 0.69767442
|
|
0.65 0.72972973 0.725 0.68571429]
|
|
|
|
mean value: 0.7021226279930791
|
|
|
|
key: train_jcc
|
|
value: [0.96917808 0.95205479 0.95578231 0.95286195 0.98954704 0.96258503
|
|
0.96551724 0.94932432 0.96296296 0.96258503]
|
|
|
|
mean value: 0.9622398777520786
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05209064 0.03377962 0.03262115 0.03452277 0.03428578 0.03552485
|
|
0.03424144 0.03228545 0.03130507 0.03644228]
|
|
|
|
mean value: 0.03570990562438965
|
|
|
|
key: score_time
|
|
value: [0.00966406 0.00889969 0.00893044 0.00901508 0.00900245 0.00888991
|
|
0.00902891 0.00895739 0.00901842 0.00905275]
|
|
|
|
mean value: 0.009045910835266114
|
|
|
|
key: test_mcc
|
|
value: [0.75592895 0.9375 0.75 0.78163175 0.93844649 0.68352185
|
|
0.68245968 0.76058095 0.62475802 0.78160117]
|
|
|
|
mean value: 0.7696428848610658
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.96875 0.875 0.890625 0.96825397 0.84126984
|
|
0.84126984 0.87301587 0.80952381 0.88888889]
|
|
|
|
mean value: 0.8831597222222223
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86666667 0.96875 0.875 0.89230769 0.96774194 0.84848485
|
|
0.84375 0.88235294 0.81818182 0.89230769]
|
|
|
|
mean value: 0.8855543594609059
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.92857143 0.96875 0.875 0.87878788 1. 0.82352941
|
|
0.84375 0.81081081 0.77142857 0.85294118]
|
|
|
|
mean value: 0.8753569277833984
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.96875 0.875 0.90625 0.9375 0.875
|
|
0.84375 0.96774194 0.87096774 0.93548387]
|
|
|
|
mean value: 0.8992943548387097
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.96875 0.875 0.890625 0.96875 0.84072581
|
|
0.84122984 0.87449597 0.81048387 0.88961694]
|
|
|
|
mean value: 0.8834677419354838
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76470588 0.93939394 0.77777778 0.80555556 0.9375 0.73684211
|
|
0.72972973 0.78947368 0.69230769 0.80555556]
|
|
|
|
mean value: 0.7978841922146875
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13985062 0.14144874 0.14082766 0.14119935 0.13991904 0.13978195
|
|
0.14032125 0.13998294 0.14152503 0.14098597]
|
|
|
|
mean value: 0.14058425426483154
|
|
|
|
key: score_time
|
|
value: [0.01828051 0.01824522 0.01849365 0.01862502 0.01824903 0.01838708
|
|
0.01830077 0.01825333 0.01820087 0.01816535]
|
|
|
|
mean value: 0.018320083618164062
|
|
|
|
key: test_mcc
|
|
value: [0.625 0.78163175 0.62622429 0.78163175 0.58728587 0.65821474
|
|
0.61982085 0.59049817 0.4969666 0.72270545]
|
|
|
|
mean value: 0.648997946446946
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.890625 0.8125 0.890625 0.79365079 0.82539683
|
|
0.80952381 0.79365079 0.74603175 0.85714286]
|
|
|
|
mean value: 0.8231646825396826
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8125 0.88888889 0.80645161 0.88888889 0.8 0.84057971
|
|
0.81818182 0.8 0.75757576 0.84210526]
|
|
|
|
mean value: 0.8255171939741401
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.90322581 0.83333333 0.90322581 0.78787879 0.78378378
|
|
0.79411765 0.76470588 0.71428571 0.92307692]
|
|
|
|
mean value: 0.8220133684673533
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.875 0.78125 0.875 0.8125 0.90625
|
|
0.84375 0.83870968 0.80645161 0.77419355]
|
|
|
|
mean value: 0.8325604838709677
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.890625 0.8125 0.890625 0.79334677 0.82409274
|
|
0.80897177 0.79435484 0.74697581 0.85584677]
|
|
|
|
mean value: 0.822983870967742
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.68421053 0.8 0.67567568 0.8 0.66666667 0.725
|
|
0.69230769 0.66666667 0.6097561 0.72727273]
|
|
|
|
mean value: 0.7047556052466194
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01070738 0.01049376 0.01071596 0.01058316 0.01123023 0.01092839
|
|
0.01083207 0.01064515 0.01091695 0.01072025]
|
|
|
|
mean value: 0.01077733039855957
|
|
|
|
key: score_time
|
|
value: [0.00892258 0.00890732 0.0089016 0.00887442 0.00891399 0.00899148
|
|
0.00892997 0.00894594 0.00897908 0.00902557]
|
|
|
|
mean value: 0.008939194679260253
|
|
|
|
key: test_mcc
|
|
value: [0.438357 0.78470603 0.474579 0.53150959 0.49193548 0.49193548
|
|
0.61982085 0.30914596 0.52419355 0.42986904]
|
|
|
|
mean value: 0.5096051977610179
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71875 0.890625 0.734375 0.765625 0.74603175 0.74603175
|
|
0.80952381 0.65079365 0.76190476 0.71428571]
|
|
|
|
mean value: 0.7537946428571428
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70967742 0.89552239 0.75362319 0.76923077 0.75 0.75
|
|
0.81818182 0.67647059 0.76190476 0.71875 ]
|
|
|
|
mean value: 0.760336093337298
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.85714286 0.7027027 0.75757576 0.75 0.75
|
|
0.79411765 0.62162162 0.75 0.6969697 ]
|
|
|
|
mean value: 0.7413463616404793
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6875 0.9375 0.8125 0.78125 0.75 0.75
|
|
0.84375 0.74193548 0.77419355 0.74193548]
|
|
|
|
mean value: 0.7820564516129033
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.71875 0.890625 0.734375 0.765625 0.74596774 0.74596774
|
|
0.80897177 0.65221774 0.76209677 0.71471774]
|
|
|
|
mean value: 0.7539314516129032
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.55 0.81081081 0.60465116 0.625 0.6 0.6
|
|
0.69230769 0.51111111 0.61538462 0.56097561]
|
|
|
|
mean value: 0.6170241002161025
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.07888103 2.10924435 2.08753395 2.09503651 2.09699631 2.11549616
|
|
2.16317201 2.11698031 2.08939385 2.10804057]
|
|
|
|
mean value: 2.10607750415802
|
|
|
|
key: score_time
|
|
value: [0.09666562 0.09476447 0.09688997 0.09551358 0.10052085 0.10555124
|
|
0.09454823 0.10372686 0.09959555 0.09707618]
|
|
|
|
mean value: 0.09848525524139404
|
|
|
|
key: test_mcc
|
|
value: [0.8125 0.96922337 0.8125 0.84416229 0.78094752 0.75156646
|
|
0.58770161 0.87487431 0.68865372 0.87462485]
|
|
|
|
mean value: 0.7996754145177033
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.984375 0.90625 0.921875 0.88888889 0.87301587
|
|
0.79365079 0.93650794 0.84126984 0.93650794]
|
|
|
|
mean value: 0.898859126984127
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90625 0.98461538 0.90625 0.92307692 0.89552239 0.88235294
|
|
0.79365079 0.9375 0.84848485 0.93333333]
|
|
|
|
mean value: 0.9011036612397455
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90625 0.96969697 0.90625 0.90909091 0.85714286 0.83333333
|
|
0.80645161 0.90909091 0.8 0.96551724]
|
|
|
|
mean value: 0.8862823832637514
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90625 1. 0.90625 0.9375 0.9375 0.9375
|
|
0.78125 0.96774194 0.90322581 0.90322581]
|
|
|
|
mean value: 0.9180443548387097
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.984375 0.90625 0.921875 0.88810484 0.87197581
|
|
0.79385081 0.93699597 0.8422379 0.9359879 ]
|
|
|
|
mean value: 0.8987903225806452
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.82857143 0.96969697 0.82857143 0.85714286 0.81081081 0.78947368
|
|
0.65789474 0.88235294 0.73684211 0.875 ]
|
|
|
|
mean value: 0.8236356962285755
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.02598143 1.0367229 0.99699593 0.98521328 1.00284791 0.98103404
|
|
1.01575923 1.06608391 1.04886723 1.05637908]
|
|
|
|
mean value: 1.0215884923934937
|
|
|
|
key: score_time
|
|
value: [0.26507068 0.28202915 0.27169871 0.27197194 0.25234628 0.2420013
|
|
0.26003218 0.26900506 0.314147 0.15186691]
|
|
|
|
mean value: 0.25801692008972166
|
|
|
|
key: test_mcc
|
|
value: [0.78470603 0.9375 0.81409158 0.84416229 0.81092385 0.78719616
|
|
0.61982085 0.78822824 0.68865372 0.87462485]
|
|
|
|
mean value: 0.7949907576810107
|
|
|
|
key: train_mcc
|
|
value: [0.93704978 0.92659532 0.9340293 0.93064988 0.94479673 0.92690929
|
|
0.93015524 0.9341391 0.94115006 0.92350399]
|
|
|
|
mean value: 0.9328978692400133
|
|
|
|
key: test_accuracy
|
|
value: [0.890625 0.96875 0.90625 0.921875 0.9047619 0.88888889
|
|
0.80952381 0.88888889 0.84126984 0.93650794]
|
|
|
|
mean value: 0.895734126984127
|
|
|
|
key: train_accuracy
|
|
value: [0.96842105 0.96315789 0.96666667 0.96491228 0.97197898 0.96322242
|
|
0.96497373 0.96672504 0.97022767 0.9614711 ]
|
|
|
|
mean value: 0.9661756843948751
|
|
|
|
key: test_fscore
|
|
value: [0.8852459 0.96875 0.90322581 0.92307692 0.90909091 0.89855072
|
|
0.81818182 0.89552239 0.84848485 0.93333333]
|
|
|
|
mean value: 0.8983462652956172
|
|
|
|
key: train_fscore
|
|
value: [0.96875 0.96360485 0.96729776 0.96563574 0.97250859 0.96373057
|
|
0.96527778 0.96740995 0.97084048 0.96219931]
|
|
|
|
mean value: 0.9667255034318909
|
|
|
|
key: test_precision
|
|
value: [0.93103448 0.96875 0.93333333 0.90909091 0.88235294 0.83783784
|
|
0.79411765 0.83333333 0.8 0.96551724]
|
|
|
|
mean value: 0.8855367725968639
|
|
|
|
key: train_precision
|
|
value: [0.95876289 0.95205479 0.94932432 0.94612795 0.95286195 0.94897959
|
|
0.95532646 0.94949495 0.95286195 0.94594595]
|
|
|
|
mean value: 0.9511740805053392
|
|
|
|
key: test_recall
|
|
value: [0.84375 0.96875 0.875 0.9375 0.9375 0.96875
|
|
0.84375 0.96774194 0.90322581 0.90322581]
|
|
|
|
mean value: 0.9149193548387097
|
|
|
|
key: train_recall
|
|
value: [0.97894737 0.9754386 0.98596491 0.98596491 0.99298246 0.97894737
|
|
0.9754386 0.98601399 0.98951049 0.97902098]
|
|
|
|
mean value: 0.982822966507177
|
|
|
|
key: test_roc_auc
|
|
value: [0.890625 0.96875 0.90625 0.921875 0.90423387 0.88760081
|
|
0.80897177 0.89012097 0.8422379 0.9359879 ]
|
|
|
|
mean value: 0.8956653225806451
|
|
|
|
key: train_roc_auc
|
|
value: [0.96842105 0.96315789 0.96666667 0.96491228 0.9720157 0.96324991
|
|
0.96499203 0.9666912 0.97019384 0.96144031]
|
|
|
|
mean value: 0.9661740890688258
|
|
|
|
key: test_jcc
|
|
value: [0.79411765 0.93939394 0.82352941 0.85714286 0.83333333 0.81578947
|
|
0.69230769 0.81081081 0.73684211 0.875 ]
|
|
|
|
mean value: 0.817826727075953
|
|
|
|
key: train_jcc
|
|
value: [0.93939394 0.92976589 0.93666667 0.93355482 0.94648829 0.93
|
|
0.93288591 0.93687708 0.94333333 0.92715232]
|
|
|
|
mean value: 0.9356118237604717
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0240097 0.0106535 0.01081276 0.01077318 0.01083565 0.01066613
|
|
0.01063871 0.01072431 0.01179624 0.01075387]
|
|
|
|
mean value: 0.012166404724121093
|
|
|
|
key: score_time
|
|
value: [0.00999904 0.0090971 0.00928092 0.009094 0.00921273 0.00915384
|
|
0.00908613 0.00915599 0.00989032 0.00916982]
|
|
|
|
mean value: 0.00931398868560791
|
|
|
|
key: test_mcc
|
|
value: [0.53150959 0.50395263 0.21971769 0.44539933 0.49493401 0.52419355
|
|
0.46146899 0.37363667 0.33366935 0.58770161]
|
|
|
|
mean value: 0.4476183430779835
|
|
|
|
key: train_mcc
|
|
value: [0.55284089 0.54886043 0.51932702 0.52686418 0.54489338 0.48908468
|
|
0.55937838 0.5535563 0.52463264 0.52543149]
|
|
|
|
mean value: 0.5344869395135066
|
|
|
|
key: test_accuracy
|
|
value: [0.765625 0.75 0.609375 0.71875 0.74603175 0.76190476
|
|
0.73015873 0.68253968 0.66666667 0.79365079]
|
|
|
|
mean value: 0.7224702380952381
|
|
|
|
key: train_accuracy
|
|
value: [0.7754386 0.77368421 0.75964912 0.76315789 0.77232925 0.74430823
|
|
0.7793345 0.77583187 0.76182137 0.76182137]
|
|
|
|
mean value: 0.7667376409500107
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.76470588 0.59016393 0.74285714 0.76470588 0.76190476
|
|
0.74626866 0.70588235 0.66666667 0.79365079]
|
|
|
|
mean value: 0.7298710835773833
|
|
|
|
key: train_fscore
|
|
value: [0.78451178 0.78172589 0.7609075 0.76843911 0.77508651 0.74914089
|
|
0.78424658 0.7852349 0.76949153 0.77181208]
|
|
|
|
mean value: 0.7730596764554477
|
|
|
|
key: test_precision
|
|
value: [0.77419355 0.72222222 0.62068966 0.68421053 0.72222222 0.77419355
|
|
0.71428571 0.64864865 0.65625 0.78125 ]
|
|
|
|
mean value: 0.7098166085641204
|
|
|
|
key: train_precision
|
|
value: [0.75404531 0.75490196 0.75694444 0.75167785 0.76450512 0.73400673
|
|
0.76588629 0.75483871 0.74671053 0.74193548]
|
|
|
|
mean value: 0.7525452425971371
|
|
|
|
key: test_recall
|
|
value: [0.75 0.8125 0.5625 0.8125 0.8125 0.75
|
|
0.78125 0.77419355 0.67741935 0.80645161]
|
|
|
|
mean value: 0.7539314516129032
|
|
|
|
key: train_recall
|
|
value: [0.81754386 0.81052632 0.76491228 0.78596491 0.78596491 0.76491228
|
|
0.80350877 0.81818182 0.79370629 0.8041958 ]
|
|
|
|
mean value: 0.794941724941725
|
|
|
|
key: test_roc_auc
|
|
value: [0.765625 0.75 0.609375 0.71875 0.74495968 0.76209677
|
|
0.72933468 0.68397177 0.66683468 0.79385081]
|
|
|
|
mean value: 0.7224798387096774
|
|
|
|
key: train_roc_auc
|
|
value: [0.7754386 0.77368421 0.75964912 0.76315789 0.77235309 0.74434425
|
|
0.77937676 0.77575758 0.76176543 0.76174702]
|
|
|
|
mean value: 0.766727395411606
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.61904762 0.41860465 0.59090909 0.61904762 0.61538462
|
|
0.5952381 0.54545455 0.5 0.65789474]
|
|
|
|
mean value: 0.5776965588471097
|
|
|
|
key: train_jcc
|
|
value: [0.64542936 0.64166667 0.61408451 0.62395543 0.63276836 0.5989011
|
|
0.64507042 0.64640884 0.62534435 0.6284153 ]
|
|
|
|
mean value: 0.6302044344305446
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.10538983 0.1000948 0.09575319 0.09839797 0.26285148 0.09676743
|
|
0.10008001 0.10182142 0.10213518 0.10413456]
|
|
|
|
mean value: 0.11674258708953858
|
|
|
|
key: score_time
|
|
value: [0.01122642 0.01133132 0.0112431 0.01126313 0.01146555 0.01128602
|
|
0.01158309 0.01123977 0.01124072 0.01122522]
|
|
|
|
mean value: 0.011310434341430664
|
|
|
|
key: test_mcc
|
|
value: [0.875 0.96922337 0.84416229 0.84416229 0.90524194 0.78094752
|
|
0.68415777 0.84530217 0.68865372 1. ]
|
|
|
|
mean value: 0.8436851064509476
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.984375 0.921875 0.921875 0.95238095 0.88888889
|
|
0.84126984 0.92063492 0.84126984 1. ]
|
|
|
|
mean value: 0.9210069444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.98461538 0.92063492 0.92307692 0.95238095 0.89552239
|
|
0.83870968 0.92307692 0.84848485 1. ]
|
|
|
|
mean value: 0.9224002017749009
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.96969697 0.93548387 0.90909091 0.96774194 0.85714286
|
|
0.86666667 0.88235294 0.8 1. ]
|
|
|
|
mean value: 0.9125676150225486
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9375 1. 0.90625 0.9375 0.9375 0.9375
|
|
0.8125 0.96774194 0.90322581 1. ]
|
|
|
|
mean value: 0.9339717741935484
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.984375 0.921875 0.921875 0.95262097 0.88810484
|
|
0.84173387 0.92137097 0.8422379 1. ]
|
|
|
|
mean value: 0.9211693548387097
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.96969697 0.85294118 0.85714286 0.90909091 0.81081081
|
|
0.72222222 0.85714286 0.73684211 1. ]
|
|
|
|
mean value: 0.8598242849016843
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04755354 0.09139705 0.07708144 0.09074998 0.06661797 0.10064197
|
|
0.05450773 0.06642532 0.05162716 0.04805136]
|
|
|
|
mean value: 0.06946535110473633
|
|
|
|
key: score_time
|
|
value: [0.01876473 0.01898789 0.01507235 0.01241088 0.01922154 0.01299763
|
|
0.02347302 0.01298523 0.02243018 0.02135372]
|
|
|
|
mean value: 0.017769718170166017
|
|
|
|
key: test_mcc
|
|
value: [0.47082362 0.67253825 0.438357 0.73658951 0.6385282 0.68740835
|
|
0.71443023 0.66853948 0.51058887 0.68245968]
|
|
|
|
mean value: 0.62202631854168
|
|
|
|
key: train_mcc
|
|
value: [0.82329065 0.83911319 0.81853204 0.81252947 0.84607646 0.83921607
|
|
0.80796777 0.81578865 0.83960889 0.82280711]
|
|
|
|
mean value: 0.8264930306653335
|
|
|
|
key: test_accuracy
|
|
value: [0.734375 0.828125 0.71875 0.859375 0.80952381 0.84126984
|
|
0.85714286 0.82539683 0.74603175 0.84126984]
|
|
|
|
mean value: 0.806125992063492
|
|
|
|
key: train_accuracy
|
|
value: [0.91052632 0.91929825 0.90877193 0.90526316 0.92294221 0.91943958
|
|
0.90367776 0.90718039 0.91943958 0.91068301]
|
|
|
|
mean value: 0.9127222171014225
|
|
|
|
key: test_fscore
|
|
value: [0.72131148 0.84507042 0.70967742 0.84210526 0.83333333 0.85294118
|
|
0.86153846 0.84057971 0.77142857 0.83870968]
|
|
|
|
mean value: 0.8116695510793017
|
|
|
|
key: train_fscore
|
|
value: [0.91370558 0.92068966 0.9109589 0.90847458 0.92361111 0.92041522
|
|
0.90533563 0.91001698 0.92123288 0.91341256]
|
|
|
|
mean value: 0.9147853101869589
|
|
|
|
key: test_precision
|
|
value: [0.75862069 0.76923077 0.73333333 0.96 0.75 0.80555556
|
|
0.84848485 0.76315789 0.69230769 0.83870968]
|
|
|
|
mean value: 0.7919400460723568
|
|
|
|
key: train_precision
|
|
value: [0.88235294 0.90508475 0.88963211 0.87868852 0.91408935 0.90784983
|
|
0.88851351 0.88448845 0.90268456 0.88778878]
|
|
|
|
mean value: 0.8941172799978007
|
|
|
|
key: test_recall
|
|
value: [0.6875 0.9375 0.6875 0.75 0.9375 0.90625
|
|
0.875 0.93548387 0.87096774 0.83870968]
|
|
|
|
mean value: 0.8426411290322581
|
|
|
|
key: train_recall
|
|
value: [0.94736842 0.93684211 0.93333333 0.94035088 0.93333333 0.93333333
|
|
0.92280702 0.93706294 0.94055944 0.94055944]
|
|
|
|
mean value: 0.936555023923445
|
|
|
|
key: test_roc_auc
|
|
value: [0.734375 0.828125 0.71875 0.859375 0.80745968 0.84022177
|
|
0.85685484 0.82711694 0.74798387 0.84122984]
|
|
|
|
mean value: 0.8061491935483871
|
|
|
|
key: train_roc_auc
|
|
value: [0.91052632 0.91929825 0.90877193 0.90526316 0.92296037 0.91946387
|
|
0.9037112 0.90712796 0.91940253 0.9106306 ]
|
|
|
|
mean value: 0.9127156177156177
|
|
|
|
key: test_jcc
|
|
value: [0.56410256 0.73170732 0.55 0.72727273 0.71428571 0.74358974
|
|
0.75675676 0.725 0.62790698 0.72222222]
|
|
|
|
mean value: 0.6862844022047085
|
|
|
|
key: train_jcc
|
|
value: [0.8411215 0.85303514 0.83647799 0.83229814 0.85806452 0.8525641
|
|
0.82704403 0.83489097 0.85396825 0.840625 ]
|
|
|
|
mean value: 0.8430089626715126
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01229477 0.01073599 0.01056314 0.01035595 0.01048636 0.01037979
|
|
0.01035643 0.0102849 0.01053333 0.01048923]
|
|
|
|
mean value: 0.010647988319396973
|
|
|
|
key: score_time
|
|
value: [0.010355 0.0092082 0.00895095 0.00888205 0.00895739 0.00913954
|
|
0.00916409 0.00913596 0.00908208 0.00891948]
|
|
|
|
mean value: 0.009179472923278809
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.50395263 0.40644851 0.65657067 0.57258185 0.59372402
|
|
0.4647426 0.37363667 0.4969666 0.68245968]
|
|
|
|
mean value: 0.5267481010393584
|
|
|
|
key: train_mcc
|
|
value: [0.54984141 0.56727781 0.53204317 0.56144502 0.55001067 0.51487506
|
|
0.5477723 0.5887123 0.58703803 0.55797288]
|
|
|
|
mean value: 0.5556988650980035
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.703125 0.828125 0.77777778 0.79365079
|
|
0.73015873 0.68253968 0.74603175 0.84126984]
|
|
|
|
mean value: 0.7602678571428572
|
|
|
|
key: train_accuracy
|
|
value: [0.77368421 0.78245614 0.76491228 0.77894737 0.77408056 0.75656743
|
|
0.77232925 0.79334501 0.7915937 0.77758319]
|
|
|
|
mean value: 0.7765499124343258
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.76470588 0.70769231 0.83076923 0.80555556 0.8115942
|
|
0.75362319 0.70588235 0.75757576 0.83870968]
|
|
|
|
mean value: 0.7753885933388449
|
|
|
|
key: train_fscore
|
|
value: [0.7839196 0.79194631 0.77516779 0.79069767 0.78246206 0.76559865
|
|
0.78333333 0.80201342 0.80330579 0.78868552]
|
|
|
|
mean value: 0.7867130140033903
|
|
|
|
key: test_precision
|
|
value: [0.7 0.72222222 0.6969697 0.81818182 0.725 0.75675676
|
|
0.7027027 0.64864865 0.71428571 0.83870968]
|
|
|
|
mean value: 0.7323477237186915
|
|
|
|
key: train_precision
|
|
value: [0.75 0.75884244 0.74276527 0.75078864 0.75324675 0.73701299
|
|
0.74603175 0.77096774 0.76175549 0.75238095]
|
|
|
|
mean value: 0.7523792027076264
|
|
|
|
key: test_recall
|
|
value: [0.875 0.8125 0.71875 0.84375 0.90625 0.875
|
|
0.8125 0.77419355 0.80645161 0.83870968]
|
|
|
|
mean value: 0.8263104838709677
|
|
|
|
key: train_recall
|
|
value: [0.82105263 0.82807018 0.81052632 0.83508772 0.81403509 0.79649123
|
|
0.8245614 0.83566434 0.84965035 0.82867133]
|
|
|
|
mean value: 0.8243810575389523
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.703125 0.828125 0.77570565 0.79233871
|
|
0.72883065 0.68397177 0.74697581 0.84122984]
|
|
|
|
mean value: 0.7600302419354839
|
|
|
|
key: train_roc_auc
|
|
value: [0.77368421 0.78245614 0.76491228 0.77894737 0.77415041 0.75663722
|
|
0.77242056 0.79327076 0.79149184 0.77749356]
|
|
|
|
mean value: 0.7765464360201202
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.61904762 0.54761905 0.71052632 0.6744186 0.68292683
|
|
0.60465116 0.54545455 0.6097561 0.72222222]
|
|
|
|
mean value: 0.6352986080767673
|
|
|
|
key: train_jcc
|
|
value: [0.6446281 0.65555556 0.63287671 0.65384615 0.64265928 0.62021858
|
|
0.64383562 0.66946779 0.67127072 0.6510989 ]
|
|
|
|
mean value: 0.6485457402801543
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01563811 0.02035022 0.02358508 0.02702594 0.0256815 0.02155948
|
|
0.02621078 0.02639723 0.02369452 0.02356243]
|
|
|
|
mean value: 0.023370528221130372
|
|
|
|
key: score_time
|
|
value: [0.01053476 0.01212144 0.01214385 0.0119803 0.01212549 0.01194334
|
|
0.01219034 0.01225972 0.01224947 0.01227522]
|
|
|
|
mean value: 0.011982393264770509
|
|
|
|
key: test_mcc
|
|
value: [0.55603844 0.6011334 0.4163332 0.75146915 0.41887185 0.56796183
|
|
0.5180609 0.50132936 0.46068548 0.56086231]
|
|
|
|
mean value: 0.5352745927373167
|
|
|
|
key: train_mcc
|
|
value: [0.54890295 0.70877629 0.70214646 0.73373289 0.72976328 0.45009303
|
|
0.58267032 0.66545422 0.74097799 0.67202979]
|
|
|
|
mean value: 0.6534547204028045
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.796875 0.703125 0.875 0.68253968 0.74603175
|
|
0.71428571 0.71428571 0.73015873 0.77777778]
|
|
|
|
mean value: 0.7490079365079365
|
|
|
|
key: train_accuracy
|
|
value: [0.73684211 0.85438596 0.84912281 0.86140351 0.85464098 0.67250438
|
|
0.75656743 0.81085814 0.8704028 0.81611208]
|
|
|
|
mean value: 0.8082840200325683
|
|
|
|
key: test_fscore
|
|
value: [0.79487179 0.77966102 0.66666667 0.87878788 0.75 0.8
|
|
0.7804878 0.76923077 0.73015873 0.78787879]
|
|
|
|
mean value: 0.7737743449421829
|
|
|
|
key: train_fscore
|
|
value: [0.78991597 0.85464098 0.84074074 0.8723748 0.86970173 0.75166003
|
|
0.80283688 0.83976261 0.86925795 0.84304933]
|
|
|
|
mean value: 0.8333941007922129
|
|
|
|
key: test_precision
|
|
value: [0.67391304 0.85185185 0.76 0.85294118 0.625 0.66666667
|
|
0.64 0.63829787 0.71875 0.74285714]
|
|
|
|
mean value: 0.7170277753664936
|
|
|
|
key: train_precision
|
|
value: [0.65734266 0.85314685 0.89019608 0.80838323 0.78693182 0.60470085
|
|
0.67380952 0.72938144 0.87857143 0.73629243]
|
|
|
|
mean value: 0.7618756319214844
|
|
|
|
key: test_recall
|
|
value: [0.96875 0.71875 0.59375 0.90625 0.9375 1.
|
|
1. 0.96774194 0.74193548 0.83870968]
|
|
|
|
mean value: 0.8673387096774193
|
|
|
|
key: train_recall
|
|
value: [0.98947368 0.85614035 0.79649123 0.94736842 0.97192982 0.99298246
|
|
0.99298246 0.98951049 0.86013986 0.98601399]
|
|
|
|
mean value: 0.9383032756716967
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.796875 0.703125 0.875 0.67842742 0.74193548
|
|
0.70967742 0.71824597 0.73034274 0.77872984]
|
|
|
|
mean value: 0.7482358870967742
|
|
|
|
key: train_roc_auc
|
|
value: [0.73684211 0.85438596 0.84912281 0.86140351 0.85484603 0.67306465
|
|
0.75698074 0.81054472 0.87042081 0.81581401]
|
|
|
|
mean value: 0.8083425346583241
|
|
|
|
key: test_jcc
|
|
value: [0.65957447 0.63888889 0.5 0.78378378 0.6 0.66666667
|
|
0.64 0.625 0.575 0.65 ]
|
|
|
|
mean value: 0.6338913807424446
|
|
|
|
key: train_jcc
|
|
value: [0.65277778 0.74617737 0.72523962 0.77363897 0.76944444 0.60212766
|
|
0.67061611 0.72378517 0.76875 0.72868217]
|
|
|
|
mean value: 0.7161239287449186
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03134894 0.02866793 0.02440834 0.02831697 0.03325534 0.02925897
|
|
0.0286274 0.02725267 0.02873611 0.02479219]
|
|
|
|
mean value: 0.02846648693084717
|
|
|
|
key: score_time
|
|
value: [0.01226878 0.01199651 0.01198626 0.01231027 0.01204038 0.012254
|
|
0.01232457 0.01247358 0.01225519 0.01198483]
|
|
|
|
mean value: 0.012189435958862304
|
|
|
|
key: test_mcc
|
|
value: [0.49916874 0.64549722 0.31311215 0.78470603 0.54443762 0.62939541
|
|
0.59049817 0.56126657 0.36114822 0.49283288]
|
|
|
|
mean value: 0.5422063009133407
|
|
|
|
key: train_mcc
|
|
value: [0.62861856 0.78488853 0.75958458 0.78488853 0.78400945 0.7560971
|
|
0.75332186 0.7666719 0.57667378 0.48335361]
|
|
|
|
mean value: 0.7078107890020772
|
|
|
|
key: test_accuracy
|
|
value: [0.734375 0.8125 0.65625 0.890625 0.76190476 0.80952381
|
|
0.79365079 0.76190476 0.65079365 0.6984127 ]
|
|
|
|
mean value: 0.7569940476190476
|
|
|
|
key: train_accuracy
|
|
value: [0.79473684 0.88947368 0.87894737 0.88947368 0.88966725 0.87390543
|
|
0.87565674 0.87565674 0.76007005 0.69702277]
|
|
|
|
mean value: 0.8424610563185547
|
|
|
|
key: test_fscore
|
|
value: [0.67924528 0.83333333 0.64516129 0.8852459 0.79452055 0.82857143
|
|
0.78688525 0.79452055 0.5 0.55813953]
|
|
|
|
mean value: 0.7305623113561326
|
|
|
|
key: train_fscore
|
|
value: [0.75159236 0.89586777 0.88285229 0.89586777 0.89517471 0.88235294
|
|
0.87067395 0.88712242 0.69487751 0.57493857]
|
|
|
|
mean value: 0.8231320285575312
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.75 0.66666667 0.93103448 0.70731707 0.76315789
|
|
0.82758621 0.69047619 0.84615385 1. ]
|
|
|
|
mean value: 0.8039535218002306
|
|
|
|
key: train_precision
|
|
value: [0.9516129 0.846875 0.85526316 0.846875 0.85126582 0.82568807
|
|
0.90530303 0.81341108 0.95705521 0.96694215]
|
|
|
|
mean value: 0.8820291429804338
|
|
|
|
key: test_recall
|
|
value: [0.5625 0.9375 0.625 0.84375 0.90625 0.90625
|
|
0.75 0.93548387 0.35483871 0.38709677]
|
|
|
|
mean value: 0.7208669354838709
|
|
|
|
key: train_recall
|
|
value: [0.62105263 0.95087719 0.9122807 0.95087719 0.94385965 0.94736842
|
|
0.83859649 0.97552448 0.54545455 0.40909091]
|
|
|
|
mean value: 0.8094982210771684
|
|
|
|
key: test_roc_auc
|
|
value: [0.734375 0.8125 0.65625 0.890625 0.75957661 0.80796371
|
|
0.79435484 0.76461694 0.64616935 0.69354839]
|
|
|
|
mean value: 0.7559979838709677
|
|
|
|
key: train_roc_auc
|
|
value: [0.79473684 0.88947368 0.87894737 0.88947368 0.88976199 0.87403386
|
|
0.87559195 0.87548154 0.76044657 0.69752791]
|
|
|
|
mean value: 0.8425475401791191
|
|
|
|
key: test_jcc
|
|
value: [0.51428571 0.71428571 0.47619048 0.79411765 0.65909091 0.70731707
|
|
0.64864865 0.65909091 0.33333333 0.38709677]
|
|
|
|
mean value: 0.5893457199348808
|
|
|
|
key: train_jcc
|
|
value: [0.60204082 0.81137725 0.79027356 0.81137725 0.81024096 0.78947368
|
|
0.77096774 0.79714286 0.53242321 0.40344828]
|
|
|
|
mean value: 0.7118765594772982
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21471906 0.19875097 0.20195174 0.19802856 0.19768119 0.19887018
|
|
0.1979003 0.2040205 0.20287895 0.20416617]
|
|
|
|
mean value: 0.2018967628479004
|
|
|
|
key: score_time
|
|
value: [0.01584053 0.01595926 0.01975155 0.01568866 0.01623011 0.015908
|
|
0.01558733 0.0160017 0.01708651 0.01687288]
|
|
|
|
mean value: 0.016492652893066406
|
|
|
|
key: test_mcc
|
|
value: [0.81409158 0.875 0.78163175 0.84416229 0.87487431 0.78094752
|
|
0.61982085 0.73343622 0.71790017 0.8415746 ]
|
|
|
|
mean value: 0.7883439285886955
|
|
|
|
key: train_mcc
|
|
value: [0.93704978 0.94406888 0.95108798 0.95097086 0.94760737 0.94416837
|
|
0.96862577 0.94404909 0.95117136 0.95817844]
|
|
|
|
mean value: 0.949697790480524
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.9375 0.890625 0.921875 0.93650794 0.88888889
|
|
0.80952381 0.85714286 0.85714286 0.92063492]
|
|
|
|
mean value: 0.892609126984127
|
|
|
|
key: train_accuracy
|
|
value: [0.96842105 0.97192982 0.9754386 0.9754386 0.9737303 0.97197898
|
|
0.98423818 0.97197898 0.97548161 0.97898424]
|
|
|
|
mean value: 0.9747620364396105
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.9375 0.89230769 0.92307692 0.93548387 0.89552239
|
|
0.81818182 0.86956522 0.86153846 0.91803279]
|
|
|
|
mean value: 0.8960300067499798
|
|
|
|
key: train_fscore
|
|
value: [0.96875 0.97222222 0.97569444 0.97560976 0.97391304 0.97222222
|
|
0.98434783 0.97222222 0.97577855 0.97923875]
|
|
|
|
mean value: 0.9749999037811952
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.9375 0.87878788 0.90909091 0.96666667 0.85714286
|
|
0.79411765 0.78947368 0.82352941 0.93333333]
|
|
|
|
mean value: 0.8771995329232172
|
|
|
|
key: train_precision
|
|
value: [0.95876289 0.96219931 0.96563574 0.96885813 0.96551724 0.96219931
|
|
0.97586207 0.96551724 0.96575342 0.96917808]
|
|
|
|
mean value: 0.9659483440920449
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 0.90625 0.9375 0.90625 0.9375
|
|
0.84375 0.96774194 0.90322581 0.90322581]
|
|
|
|
mean value: 0.9180443548387097
|
|
|
|
key: train_recall
|
|
value: [0.97894737 0.98245614 0.98596491 0.98245614 0.98245614 0.98245614
|
|
0.99298246 0.97902098 0.98601399 0.98951049]
|
|
|
|
mean value: 0.9842264752791068
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.9375 0.890625 0.921875 0.93699597 0.88810484
|
|
0.80897177 0.85887097 0.8578629 0.9203629 ]
|
|
|
|
mean value: 0.8927419354838709
|
|
|
|
key: train_roc_auc
|
|
value: [0.96842105 0.97192982 0.9754386 0.9754386 0.97374555 0.9719973
|
|
0.98425347 0.97196663 0.97546313 0.97896577]
|
|
|
|
mean value: 0.9747619923935713
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.88235294 0.80555556 0.85714286 0.87878788 0.81081081
|
|
0.69230769 0.76923077 0.75675676 0.84848485]
|
|
|
|
mean value: 0.8134763443586973
|
|
|
|
key: train_jcc
|
|
value: [0.93939394 0.94594595 0.95254237 0.95238095 0.94915254 0.94594595
|
|
0.96917808 0.94594595 0.9527027 0.95932203]
|
|
|
|
mean value: 0.9512510463659756
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08887267 0.07713723 0.12714386 0.11457253 0.09450245 0.09540319
|
|
0.12129784 0.11899137 0.11339378 0.13355446]
|
|
|
|
mean value: 0.1084869384765625
|
|
|
|
key: score_time
|
|
value: [0.02319431 0.02583909 0.04017091 0.02797508 0.02424788 0.02266908
|
|
0.04040813 0.02550125 0.03998375 0.0358336 ]
|
|
|
|
mean value: 0.030582308769226074
|
|
|
|
key: test_mcc
|
|
value: [0.8125 0.90669283 0.75 0.81409158 0.87298387 0.72270545
|
|
0.68245968 0.84530217 0.68415777 0.93649194]
|
|
|
|
mean value: 0.8027385275255405
|
|
|
|
key: train_mcc
|
|
value: [0.98596491 0.98606204 0.98947978 0.98947978 0.98949809 0.98954653
|
|
0.99650345 0.98598945 0.98263804 0.98601347]
|
|
|
|
mean value: 0.9881175538085738
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.953125 0.875 0.90625 0.93650794 0.85714286
|
|
0.84126984 0.92063492 0.84126984 0.96825397]
|
|
|
|
mean value: 0.9005704365079366
|
|
|
|
key: train_accuracy
|
|
value: [0.99298246 0.99298246 0.99473684 0.99473684 0.99474606 0.99474606
|
|
0.99824869 0.99299475 0.99124343 0.99299475]
|
|
|
|
mean value: 0.9940412326788951
|
|
|
|
key: test_fscore
|
|
value: [0.90625 0.95238095 0.875 0.90909091 0.9375 0.86956522
|
|
0.84375 0.92307692 0.84375 0.96774194]
|
|
|
|
mean value: 0.902810593742396
|
|
|
|
key: train_fscore
|
|
value: [0.99298246 0.99303136 0.99474606 0.99472759 0.99472759 0.99470899
|
|
0.99824253 0.99300699 0.99118166 0.99303136]
|
|
|
|
mean value: 0.994038659430934
|
|
|
|
key: test_precision
|
|
value: [0.90625 0.96774194 0.875 0.88235294 0.9375 0.81081081
|
|
0.84375 0.88235294 0.81818182 0.96774194]
|
|
|
|
mean value: 0.8891682382313312
|
|
|
|
key: train_precision
|
|
value: [0.99298246 0.98615917 0.99300699 0.99647887 0.99647887 1.
|
|
1. 0.99300699 1. 0.98958333]
|
|
|
|
mean value: 0.9947696691516716
|
|
|
|
key: test_recall
|
|
value: [0.90625 0.9375 0.875 0.9375 0.9375 0.9375
|
|
0.84375 0.96774194 0.87096774 0.96774194]
|
|
|
|
mean value: 0.9181451612903226
|
|
|
|
key: train_recall
|
|
value: [0.99298246 1. 0.99649123 0.99298246 0.99298246 0.98947368
|
|
0.99649123 0.99300699 0.98251748 0.9965035 ]
|
|
|
|
mean value: 0.9933431480799901
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.953125 0.875 0.90625 0.93649194 0.85584677
|
|
0.84122984 0.92137097 0.84173387 0.96824597]
|
|
|
|
mean value: 0.9005544354838709
|
|
|
|
key: train_roc_auc
|
|
value: [0.99298246 0.99298246 0.99473684 0.99473684 0.99474298 0.99473684
|
|
0.99824561 0.99299472 0.99125874 0.99298859]
|
|
|
|
mean value: 0.9940406085142927
|
|
|
|
key: test_jcc
|
|
value: [0.82857143 0.90909091 0.77777778 0.83333333 0.88235294 0.76923077
|
|
0.72972973 0.85714286 0.72972973 0.9375 ]
|
|
|
|
mean value: 0.8254459475783005
|
|
|
|
key: train_jcc
|
|
value: [0.98606272 0.98615917 0.98954704 0.98951049 0.98951049 0.98947368
|
|
0.99649123 0.98611111 0.98251748 0.98615917]
|
|
|
|
mean value: 0.9881542580128181
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.25840831 0.23501658 0.19894886 0.22410798 0.22995543 0.23805594
|
|
0.23612881 0.2686615 0.23128748 0.22726607]
|
|
|
|
mean value: 0.2347836971282959
|
|
|
|
key: score_time
|
|
value: [0.02715898 0.01644039 0.02714419 0.02729154 0.02735209 0.02731228
|
|
0.02736306 0.02728319 0.02738619 0.02738714]
|
|
|
|
mean value: 0.02621190547943115
|
|
|
|
key: test_mcc
|
|
value: [0.56360186 0.69293487 0.4375 0.65657067 0.40473508 0.5253647
|
|
0.52371369 0.48255984 0.48255984 0.58728587]
|
|
|
|
mean value: 0.5356826409991767
|
|
|
|
key: train_mcc
|
|
value: [0.96169363 0.96512618 0.96857012 0.9615515 0.95475466 0.96176174
|
|
0.96556818 0.95493785 0.96862386 0.95493785]
|
|
|
|
mean value: 0.9617525571921755
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.84375 0.71875 0.828125 0.6984127 0.76190476
|
|
0.76190476 0.73015873 0.73015873 0.79365079]
|
|
|
|
mean value: 0.7648065476190475
|
|
|
|
key: train_accuracy
|
|
value: [0.98070175 0.98245614 0.98421053 0.98070175 0.97723292 0.98073555
|
|
0.98248687 0.97723292 0.98423818 0.97723292]
|
|
|
|
mean value: 0.9807229544965742
|
|
|
|
key: test_fscore
|
|
value: [0.78787879 0.85294118 0.71875 0.83076923 0.73239437 0.7761194
|
|
0.76923077 0.76056338 0.76056338 0.78688525]
|
|
|
|
mean value: 0.7776095739996653
|
|
|
|
key: train_fscore
|
|
value: [0.98093588 0.98263889 0.98434783 0.98086957 0.97746967 0.98093588
|
|
0.98275862 0.97762478 0.98440208 0.97762478]
|
|
|
|
mean value: 0.9809607971456844
|
|
|
|
key: test_precision
|
|
value: [0.76470588 0.80555556 0.71875 0.81818182 0.66666667 0.74285714
|
|
0.75757576 0.675 0.675 0.8 ]
|
|
|
|
mean value: 0.7424292823189882
|
|
|
|
key: train_precision
|
|
value: [0.96917808 0.97250859 0.97586207 0.97241379 0.96575342 0.96917808
|
|
0.96610169 0.96271186 0.97594502 0.96271186]
|
|
|
|
mean value: 0.9692364483086298
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.90625 0.71875 0.84375 0.8125 0.8125
|
|
0.78125 0.87096774 0.87096774 0.77419355]
|
|
|
|
mean value: 0.8203629032258064
|
|
|
|
key: train_recall
|
|
value: [0.99298246 0.99298246 0.99298246 0.98947368 0.98947368 0.99298246
|
|
1. 0.99300699 0.99300699 0.99300699]
|
|
|
|
mean value: 0.9929898172003435
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.84375 0.71875 0.828125 0.69657258 0.76108871
|
|
0.76159274 0.73235887 0.73235887 0.79334677]
|
|
|
|
mean value: 0.7649193548387097
|
|
|
|
key: train_roc_auc
|
|
value: [0.98070175 0.98245614 0.98421053 0.98070175 0.97725432 0.98075696
|
|
0.98251748 0.97720525 0.98422279 0.97720525]
|
|
|
|
mean value: 0.9807232241442767
|
|
|
|
key: test_jcc
|
|
value: [0.65 0.74358974 0.56097561 0.71052632 0.57777778 0.63414634
|
|
0.625 0.61363636 0.61363636 0.64864865]
|
|
|
|
mean value: 0.6377937164297883
|
|
|
|
key: train_jcc
|
|
value: [0.96258503 0.96587031 0.96917808 0.96245734 0.9559322 0.96258503
|
|
0.96610169 0.95622896 0.96928328 0.95622896]
|
|
|
|
mean value: 0.9626450882483695
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.83188581 0.84436393 0.82656622 0.82991767 0.82706404 0.83091283
|
|
0.82206988 0.83148551 0.82596612 0.82123756]
|
|
|
|
mean value: 0.8291469573974609
|
|
|
|
key: score_time
|
|
value: [0.00976157 0.0102365 0.0104444 0.00963187 0.00991011 0.01015711
|
|
0.00961208 0.00998163 0.0098474 0.00953841]
|
|
|
|
mean value: 0.009912109375
|
|
|
|
key: test_mcc
|
|
value: [0.875 0.90669283 0.8125 0.84416229 0.87487431 0.81572458
|
|
0.71471774 0.88034084 0.72407013 0.96875 ]
|
|
|
|
mean value: 0.8416832721490438
|
|
|
|
key: train_mcc
|
|
value: [0.98952851 0.98947978 0.99649736 0.98596491 0.98601381 0.99301918
|
|
0.98949809 0.98949809 0.99299472 0.97898417]
|
|
|
|
mean value: 0.9891478630296477
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.953125 0.90625 0.921875 0.93650794 0.9047619
|
|
0.85714286 0.93650794 0.85714286 0.98412698]
|
|
|
|
mean value: 0.9194940476190476
|
|
|
|
key: train_accuracy
|
|
value: [0.99473684 0.99473684 0.99824561 0.99298246 0.99299475 0.99649737
|
|
0.99474606 0.99474606 0.99649737 0.98949212]
|
|
|
|
mean value: 0.9945675484683688
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.95384615 0.90625 0.92063492 0.93548387 0.91176471
|
|
0.85714286 0.93939394 0.86567164 0.98412698]
|
|
|
|
mean value: 0.9211815073785995
|
|
|
|
key: train_fscore
|
|
value: [0.9947644 0.99474606 0.99824253 0.99298246 0.99300699 0.9965035
|
|
0.99472759 0.9947644 0.9965035 0.98951049]
|
|
|
|
mean value: 0.9945751910043851
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.93939394 0.90625 0.93548387 0.96666667 0.86111111
|
|
0.87096774 0.88571429 0.80555556 0.96875 ]
|
|
|
|
mean value: 0.9077393171344784
|
|
|
|
key: train_precision
|
|
value: [0.98958333 0.99300699 1. 0.99298246 0.98954704 0.99303136
|
|
0.99647887 0.99303136 0.9965035 0.98951049]
|
|
|
|
mean value: 0.993367539783166
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.96875 0.90625 0.90625 0.90625 0.96875
|
|
0.84375 1. 0.93548387 1. ]
|
|
|
|
mean value: 0.9372983870967742
|
|
|
|
key: train_recall
|
|
value: [1. 0.99649123 0.99649123 0.99298246 0.99649123 1.
|
|
0.99298246 0.9965035 0.9965035 0.98951049]
|
|
|
|
mean value: 0.9957956079008711
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.953125 0.90625 0.921875 0.93699597 0.90372984
|
|
0.85735887 0.9375 0.85836694 0.984375 ]
|
|
|
|
mean value: 0.9197076612903226
|
|
|
|
key: train_roc_auc
|
|
value: [0.99473684 0.99473684 0.99824561 0.99298246 0.99300086 0.9965035
|
|
0.99474298 0.99474298 0.99649736 0.98949209]
|
|
|
|
mean value: 0.9945681511470985
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.91176471 0.82857143 0.85294118 0.87878788 0.83783784
|
|
0.75 0.88571429 0.76315789 0.96875 ]
|
|
|
|
mean value: 0.8559878149177684
|
|
|
|
key: train_jcc
|
|
value: [0.98958333 0.98954704 0.99649123 0.98606272 0.98611111 0.99303136
|
|
0.98951049 0.98958333 0.99303136 0.97923875]
|
|
|
|
mean value: 0.9892190723551298
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03697062 0.0324316 0.0339098 0.03319502 0.0436852 0.05119801
|
|
0.05784035 0.06083322 0.03914094 0.03315282]
|
|
|
|
mean value: 0.04223575592041016
|
|
|
|
key: score_time
|
|
value: [0.01292467 0.01329732 0.01285863 0.01501918 0.026052 0.02982116
|
|
0.0223608 0.01836944 0.01538515 0.01521349]
|
|
|
|
mean value: 0.018130183219909668
|
|
|
|
key: test_mcc
|
|
value: [0.1242473 0.32163376 0.25819889 0.28347335 0.13433882 0.15715464
|
|
0.43960456 0.22008521 0.31933319 0.34495882]
|
|
|
|
mean value: 0.2603028546854528
|
|
|
|
key: train_mcc
|
|
value: [0.34935261 0.32679675 0.33007486 0.33333333 0.34206181 0.3515425
|
|
0.32590867 0.33683398 0.33033226 0.38353707]
|
|
|
|
mean value: 0.340977383652551
|
|
|
|
key: test_accuracy
|
|
value: [0.546875 0.59375 0.59375 0.59375 0.53968254 0.55555556
|
|
0.68253968 0.53968254 0.58730159 0.63492063]
|
|
|
|
mean value: 0.5867807539682539
|
|
|
|
key: train_accuracy
|
|
value: [0.60877193 0.59649123 0.59824561 0.6 0.60420315 0.60945709
|
|
0.59544658 0.60245184 0.59894921 0.62872154]
|
|
|
|
mean value: 0.6042738193996374
|
|
|
|
key: test_fscore
|
|
value: [0.65882353 0.71111111 0.69767442 0.70454545 0.68131868 0.68181818
|
|
0.75609756 0.68131868 0.70454545 0.71604938]
|
|
|
|
mean value: 0.699330245636564
|
|
|
|
key: train_fscore
|
|
value: [0.71878941 0.7125 0.71339174 0.71428571 0.7160804 0.71878941
|
|
0.71161049 0.71589487 0.71410737 0.72959184]
|
|
|
|
mean value: 0.7165041228602924
|
|
|
|
key: test_precision
|
|
value: [0.52830189 0.55172414 0.55555556 0.55357143 0.52542373 0.53571429
|
|
0.62 0.51666667 0.54385965 0.58 ]
|
|
|
|
mean value: 0.5510817339167791
|
|
|
|
key: train_precision
|
|
value: [0.56102362 0.55339806 0.55447471 0.55555556 0.55772994 0.56102362
|
|
0.55232558 0.55750487 0.55533981 0.57429719]
|
|
|
|
mean value: 0.5582672956635221
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 0.9375 0.96875 0.96875 0.9375
|
|
0.96875 1. 1. 0.93548387]
|
|
|
|
mean value: 0.9591733870967742
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.546875 0.59375 0.59375 0.59375 0.5327621 0.54939516
|
|
0.67792339 0.546875 0.59375 0.63961694]
|
|
|
|
mean value: 0.5868447580645162
|
|
|
|
key: train_roc_auc
|
|
value: [0.60877193 0.59649123 0.59824561 0.6 0.6048951 0.61013986
|
|
0.59615385 0.60175439 0.59824561 0.62807018]
|
|
|
|
mean value: 0.6042767758557233
|
|
|
|
key: test_jcc
|
|
value: [0.49122807 0.55172414 0.53571429 0.54385965 0.51666667 0.51724138
|
|
0.60784314 0.51666667 0.54385965 0.55769231]
|
|
|
|
mean value: 0.5382495949657261
|
|
|
|
key: train_jcc
|
|
value: [0.56102362 0.55339806 0.55447471 0.55555556 0.55772994 0.56102362
|
|
0.55232558 0.55750487 0.55533981 0.57429719]
|
|
|
|
mean value: 0.5582672956635221
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03842688 0.04042244 0.04043031 0.04043436 0.04095244 0.04054451
|
|
0.04072213 0.04055262 0.04073429 0.04077888]
|
|
|
|
mean value: 0.0403998851776123
|
|
|
|
key: score_time
|
|
value: [0.02156138 0.01931214 0.019135 0.01919627 0.01927257 0.01907492
|
|
0.01910901 0.01933813 0.01922321 0.01907396]
|
|
|
|
mean value: 0.019429659843444823
|
|
|
|
key: test_mcc
|
|
value: [0.59404013 0.69991324 0.50395263 0.69293487 0.56449867 0.62325024
|
|
0.65315611 0.57596915 0.63159952 0.68740835]
|
|
|
|
mean value: 0.6226722903584234
|
|
|
|
key: train_mcc
|
|
value: [0.79105298 0.77330677 0.77330677 0.78033683 0.79734868 0.78775886
|
|
0.78408981 0.78376086 0.78740717 0.7781152 ]
|
|
|
|
mean value: 0.7836483929794602
|
|
|
|
key: test_accuracy
|
|
value: [0.796875 0.84375 0.75 0.84375 0.77777778 0.80952381
|
|
0.82539683 0.77777778 0.80952381 0.84126984]
|
|
|
|
mean value: 0.8075644841269841
|
|
|
|
key: train_accuracy
|
|
value: [0.89473684 0.88596491 0.88596491 0.88947368 0.89842382 0.89316988
|
|
0.89141856 0.89141856 0.89316988 0.88791594]
|
|
|
|
mean value: 0.8911656988355301
|
|
|
|
key: test_fscore
|
|
value: [0.79365079 0.85714286 0.73333333 0.83333333 0.8 0.82352941
|
|
0.8358209 0.8 0.82352941 0.82758621]
|
|
|
|
mean value: 0.812792624340867
|
|
|
|
key: train_fscore
|
|
value: [0.89795918 0.88926746 0.88926746 0.89267462 0.9 0.89608177
|
|
0.89419795 0.89419795 0.89608177 0.89225589]
|
|
|
|
mean value: 0.8941984063841519
|
|
|
|
key: test_precision
|
|
value: [0.80645161 0.78947368 0.78571429 0.89285714 0.73684211 0.77777778
|
|
0.8 0.71794872 0.75675676 0.88888889]
|
|
|
|
mean value: 0.795271097232048
|
|
|
|
key: train_precision
|
|
value: [0.87128713 0.86423841 0.86423841 0.86754967 0.88474576 0.87086093
|
|
0.87043189 0.87333333 0.87375415 0.86038961]
|
|
|
|
mean value: 0.870082929887785
|
|
|
|
key: test_recall
|
|
value: [0.78125 0.9375 0.6875 0.78125 0.875 0.875
|
|
0.875 0.90322581 0.90322581 0.77419355]
|
|
|
|
mean value: 0.8393145161290323
|
|
|
|
key: train_recall
|
|
value: [0.92631579 0.91578947 0.91578947 0.91929825 0.91578947 0.92280702
|
|
0.91929825 0.91608392 0.91958042 0.92657343]
|
|
|
|
mean value: 0.9197325481536007
|
|
|
|
key: test_roc_auc
|
|
value: [0.796875 0.84375 0.75 0.84375 0.77620968 0.80846774
|
|
0.82459677 0.7797379 0.8109879 0.84022177]
|
|
|
|
mean value: 0.8074596774193549
|
|
|
|
key: train_roc_auc
|
|
value: [0.89473684 0.88596491 0.88596491 0.88947368 0.89845418 0.89322169
|
|
0.8914673 0.89137529 0.89312354 0.88784812]
|
|
|
|
mean value: 0.8911630474788369
|
|
|
|
key: test_jcc
|
|
value: [0.65789474 0.75 0.57894737 0.71428571 0.66666667 0.7
|
|
0.71794872 0.66666667 0.7 0.70588235]
|
|
|
|
mean value: 0.68582922237721
|
|
|
|
key: train_jcc
|
|
value: [0.81481481 0.8006135 0.8006135 0.80615385 0.81818182 0.8117284
|
|
0.80864198 0.80864198 0.8117284 0.80547112]
|
|
|
|
mean value: 0.8086589338376311
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.29730058 0.30320644 0.42007184 0.30402303 0.30868077 0.32410741
|
|
0.30308056 0.30208421 0.33901119 0.33116007]
|
|
|
|
mean value: 0.3232726097106934
|
|
|
|
key: score_time
|
|
value: [0.01932836 0.01916742 0.01938939 0.01922774 0.01938105 0.02234888
|
|
0.01916742 0.01919007 0.01934958 0.01917791]
|
|
|
|
mean value: 0.01957278251647949
|
|
|
|
key: test_mcc
|
|
value: [0.5625 0.67253825 0.50395263 0.69293487 0.6385282 0.62325024
|
|
0.68352185 0.57596915 0.63159952 0.68740835]
|
|
|
|
mean value: 0.6272203060525585
|
|
|
|
key: train_mcc
|
|
value: [0.81295203 0.7971982 0.77330677 0.78033683 0.82587654 0.78775886
|
|
0.80483721 0.78376086 0.78740717 0.7781152 ]
|
|
|
|
mean value: 0.7931549669716107
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.828125 0.75 0.84375 0.80952381 0.80952381
|
|
0.84126984 0.77777778 0.80952381 0.84126984]
|
|
|
|
mean value: 0.8092013888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.90526316 0.89824561 0.88596491 0.88947368 0.91243433 0.89316988
|
|
0.90192644 0.89141856 0.89316988 0.88791594]
|
|
|
|
mean value: 0.8958982394690755
|
|
|
|
key: test_fscore
|
|
value: [0.78125 0.84507042 0.73333333 0.83333333 0.83333333 0.82352941
|
|
0.84848485 0.8 0.82352941 0.82758621]
|
|
|
|
mean value: 0.8149450301446024
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.90878378 0.90034364 0.88926746 0.89267462 0.91438356 0.89608177
|
|
0.90410959 0.89419795 0.89608177 0.89225589]
|
|
|
|
mean value: 0.8988180043360514
|
|
|
|
key: test_precision
|
|
value: [0.78125 0.76923077 0.78571429 0.89285714 0.75 0.77777778
|
|
0.82352941 0.71794872 0.75675676 0.88888889]
|
|
|
|
mean value: 0.7943953750939046
|
|
|
|
key: train_precision
|
|
value: [0.8762215 0.88215488 0.86423841 0.86754967 0.89297659 0.87086093
|
|
0.88294314 0.87333333 0.87375415 0.86038961]
|
|
|
|
mean value: 0.874442221613707
|
|
|
|
key: test_recall
|
|
value: [0.78125 0.9375 0.6875 0.78125 0.9375 0.875
|
|
0.875 0.90322581 0.90322581 0.77419355]
|
|
|
|
mean value: 0.8455645161290323
|
|
|
|
key: train_recall
|
|
value: [0.94385965 0.91929825 0.91578947 0.91929825 0.93684211 0.92280702
|
|
0.92631579 0.91608392 0.91958042 0.92657343]
|
|
|
|
mean value: 0.9246448288553551
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.828125 0.75 0.84375 0.80745968 0.80846774
|
|
0.84072581 0.7797379 0.8109879 0.84022177]
|
|
|
|
mean value: 0.8090725806451613
|
|
|
|
key: train_roc_auc
|
|
value: [0.90526316 0.89824561 0.88596491 0.88947368 0.912477 0.89322169
|
|
0.90196908 0.89137529 0.89312354 0.88784812]
|
|
|
|
mean value: 0.8958962090541038
|
|
|
|
key: test_jcc
|
|
value: [0.64102564 0.73170732 0.57894737 0.71428571 0.71428571 0.7
|
|
0.73684211 0.66666667 0.7 0.70588235]
|
|
|
|
mean value: 0.6889642879962294
|
|
|
|
key: train_jcc
|
|
value: [0.83281734 0.81875 0.8006135 0.80615385 0.84227129 0.8117284
|
|
0.825 0.80864198 0.8117284 0.80547112]
|
|
|
|
mean value: 0.8163175863975216
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04284215 0.03778028 0.03733468 0.04465842 0.0438664 0.04482412
|
|
0.04302669 0.03571248 0.03619766 0.03719878]
|
|
|
|
mean value: 0.04034416675567627
|
|
|
|
key: score_time
|
|
value: [0.01585555 0.01863289 0.01600218 0.01202321 0.01203775 0.0120523
|
|
0.01193547 0.01202154 0.01468682 0.01500201]
|
|
|
|
mean value: 0.014024972915649414
|
|
|
|
key: test_mcc
|
|
value: [0.48566186 0.41736501 0.52223297 0.43519414 0.56521739 0.52623481
|
|
0.70164642 0.57396402 0.6092718 0.48566186]
|
|
|
|
mean value: 0.5322450278028839
|
|
|
|
key: train_mcc
|
|
value: [0.71565259 0.71638999 0.70571764 0.72484917 0.715385 0.69663647
|
|
0.72532357 0.68734715 0.7059816 0.70061391]
|
|
|
|
mean value: 0.7093897093196634
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.69565217 0.76086957 0.7173913 0.7826087 0.76086957
|
|
0.84782609 0.7826087 0.80434783 0.73913043]
|
|
|
|
mean value: 0.7630434782608696
|
|
|
|
key: train_accuracy
|
|
value: [0.85748792 0.85748792 0.852657 0.86231884 0.85748792 0.84782609
|
|
0.86231884 0.84299517 0.852657 0.85024155]
|
|
|
|
mean value: 0.8543478260869566
|
|
|
|
key: test_fscore
|
|
value: [0.76 0.74074074 0.75555556 0.71111111 0.7826087 0.74418605
|
|
0.85714286 0.8 0.8 0.71428571]
|
|
|
|
mean value: 0.7665630720999781
|
|
|
|
key: train_fscore
|
|
value: [0.86052009 0.8618267 0.85510689 0.86396181 0.85985748 0.85176471
|
|
0.86524823 0.84777518 0.85579196 0.85167464]
|
|
|
|
mean value: 0.8573527688643722
|
|
|
|
key: test_precision
|
|
value: [0.7037037 0.64516129 0.77272727 0.72727273 0.7826087 0.8
|
|
0.80769231 0.74074074 0.81818182 0.78947368]
|
|
|
|
mean value: 0.7587562240503851
|
|
|
|
key: train_precision
|
|
value: [0.84259259 0.83636364 0.8411215 0.85377358 0.84579439 0.83027523
|
|
0.84722222 0.82272727 0.83796296 0.8436019 ]
|
|
|
|
mean value: 0.840143528471721
|
|
|
|
key: test_recall
|
|
value: [0.82608696 0.86956522 0.73913043 0.69565217 0.7826087 0.69565217
|
|
0.91304348 0.86956522 0.7826087 0.65217391]
|
|
|
|
mean value: 0.782608695652174
|
|
|
|
key: train_recall
|
|
value: [0.87922705 0.88888889 0.86956522 0.87439614 0.87439614 0.87439614
|
|
0.88405797 0.87439614 0.87439614 0.85990338]
|
|
|
|
mean value: 0.8753623188405797
|
|
|
|
key: test_roc_auc
|
|
value: [0.73913043 0.69565217 0.76086957 0.7173913 0.7826087 0.76086957
|
|
0.84782609 0.7826087 0.80434783 0.73913043]
|
|
|
|
mean value: 0.7630434782608695
|
|
|
|
key: train_roc_auc
|
|
value: [0.85748792 0.85748792 0.852657 0.86231884 0.85748792 0.84782609
|
|
0.86231884 0.84299517 0.852657 0.85024155]
|
|
|
|
mean value: 0.8543478260869565
|
|
|
|
key: test_jcc
|
|
value: [0.61290323 0.58823529 0.60714286 0.55172414 0.64285714 0.59259259
|
|
0.75 0.66666667 0.66666667 0.55555556]
|
|
|
|
mean value: 0.6234344139336615
|
|
|
|
key: train_jcc
|
|
value: [0.75518672 0.75720165 0.74688797 0.7605042 0.75416667 0.74180328
|
|
0.7625 0.73577236 0.74793388 0.74166667]
|
|
|
|
mean value: 0.7503623390610843
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.14505887 0.8046906 0.78343368 1.02523613 0.84160423 0.96089244
|
|
0.86398363 0.91612816 0.8715713 0.87083769]
|
|
|
|
mean value: 0.9083436727523804
|
|
|
|
key: score_time
|
|
value: [0.0148313 0.0152638 0.01667786 0.01521349 0.01547384 0.01542234
|
|
0.01566553 0.01552701 0.01535368 0.01540351]
|
|
|
|
mean value: 0.015483236312866211
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.36514837 0.52223297 0.56521739 0.65465367 0.47826087
|
|
0.61394061 0.47245559 0.6092718 0.57396402]
|
|
|
|
mean value: 0.5509798963838973
|
|
|
|
key: train_mcc
|
|
value: [0.89376152 0.85025147 0.86538081 0.77792303 0.83116038 0.88422307
|
|
0.84556851 0.90822316 0.84556851 0.72977523]
|
|
|
|
mean value: 0.8431835698524185
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.67391304 0.76086957 0.7826087 0.82608696 0.73913043
|
|
0.80434783 0.7173913 0.80434783 0.7826087 ]
|
|
|
|
mean value: 0.7717391304347826
|
|
|
|
key: train_accuracy
|
|
value: [0.9468599 0.92512077 0.93236715 0.88888889 0.91545894 0.94202899
|
|
0.92270531 0.95410628 0.92270531 0.8647343 ]
|
|
|
|
mean value: 0.9214975845410628
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.71698113 0.75555556 0.7826087 0.81818182 0.73913043
|
|
0.81632653 0.76363636 0.8 0.76190476]
|
|
|
|
mean value: 0.7787658625734332
|
|
|
|
key: train_fscore
|
|
value: [0.94711538 0.92493947 0.93364929 0.88995215 0.91646778 0.94258373
|
|
0.92344498 0.95399516 0.92344498 0.86666667]
|
|
|
|
mean value: 0.9222259582829083
|
|
|
|
key: test_precision
|
|
value: [0.8 0.63333333 0.77272727 0.7826087 0.85714286 0.73913043
|
|
0.76923077 0.65625 0.81818182 0.84210526]
|
|
|
|
mean value: 0.7670710444208728
|
|
|
|
key: train_precision
|
|
value: [0.94258373 0.92718447 0.91627907 0.88151659 0.90566038 0.93364929
|
|
0.91469194 0.95631068 0.91469194 0.85446009]
|
|
|
|
mean value: 0.9147028181744306
|
|
|
|
key: test_recall
|
|
value: [0.86956522 0.82608696 0.73913043 0.7826087 0.7826087 0.73913043
|
|
0.86956522 0.91304348 0.7826087 0.69565217]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [0.95169082 0.92270531 0.95169082 0.89855072 0.92753623 0.95169082
|
|
0.93236715 0.95169082 0.93236715 0.87922705]
|
|
|
|
mean value: 0.9299516908212561
|
|
|
|
key: test_roc_auc
|
|
value: [0.82608696 0.67391304 0.76086957 0.7826087 0.82608696 0.73913043
|
|
0.80434783 0.7173913 0.80434783 0.7826087 ]
|
|
|
|
mean value: 0.7717391304347826
|
|
|
|
key: train_roc_auc
|
|
value: [0.9468599 0.92512077 0.93236715 0.88888889 0.91545894 0.94202899
|
|
0.92270531 0.95410628 0.92270531 0.8647343 ]
|
|
|
|
mean value: 0.9214975845410629
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.55882353 0.60714286 0.64285714 0.69230769 0.5862069
|
|
0.68965517 0.61764706 0.66666667 0.61538462]
|
|
|
|
mean value: 0.63909773458455
|
|
|
|
key: train_jcc
|
|
value: [0.89954338 0.86036036 0.87555556 0.80172414 0.84581498 0.89140271
|
|
0.85777778 0.91203704 0.85777778 0.76470588]
|
|
|
|
mean value: 0.8566699600693612
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01417565 0.01006126 0.00992203 0.00977826 0.00981617 0.00975299
|
|
0.00976253 0.00992513 0.00973701 0.00970101]
|
|
|
|
mean value: 0.010263204574584961
|
|
|
|
key: score_time
|
|
value: [0.01206207 0.00936532 0.00897574 0.00889325 0.00894952 0.00891542
|
|
0.00892544 0.00896716 0.00890231 0.00891733]
|
|
|
|
mean value: 0.009287357330322266
|
|
|
|
key: test_mcc
|
|
value: [0.35082321 0.30550505 0.30905755 0.52223297 0.36514837 0.48007936
|
|
0.52223297 0.4454354 0.45643546 0.39735971]
|
|
|
|
mean value: 0.4154310044853051
|
|
|
|
key: train_mcc
|
|
value: [0.44211758 0.42423178 0.44989455 0.4784619 0.44369133 0.48007936
|
|
0.4517191 0.442711 0.4517191 0.45745478]
|
|
|
|
mean value: 0.45220804789426783
|
|
|
|
key: test_accuracy
|
|
value: [0.67391304 0.65217391 0.65217391 0.76086957 0.67391304 0.73913043
|
|
0.76086957 0.7173913 0.7173913 0.69565217]
|
|
|
|
mean value: 0.7043478260869566
|
|
|
|
key: train_accuracy
|
|
value: [0.71980676 0.70531401 0.71980676 0.73913043 0.71980676 0.73913043
|
|
0.72222222 0.7173913 0.72222222 0.72705314]
|
|
|
|
mean value: 0.7231884057971014
|
|
|
|
key: test_fscore
|
|
value: [0.65116279 0.63636364 0.61904762 0.76595745 0.61538462 0.75
|
|
0.75555556 0.68292683 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6809731826459238
|
|
|
|
key: train_fscore
|
|
value: [0.70408163 0.66298343 0.68648649 0.74285714 0.69948187 0.75
|
|
0.69496021 0.688 0.69496021 0.70951157]
|
|
|
|
mean value: 0.7033322545222606
|
|
|
|
key: test_precision
|
|
value: [0.7 0.66666667 0.68421053 0.75 0.75 0.72
|
|
0.77272727 0.77777778 0.8125 0.73684211]
|
|
|
|
mean value: 0.7370724348750665
|
|
|
|
key: train_precision
|
|
value: [0.74594595 0.77419355 0.7791411 0.73239437 0.75418994 0.72
|
|
0.77058824 0.76785714 0.77058824 0.75824176]
|
|
|
|
mean value: 0.7573140280645919
|
|
|
|
key: test_recall
|
|
value: [0.60869565 0.60869565 0.56521739 0.7826087 0.52173913 0.7826087
|
|
0.73913043 0.60869565 0.56521739 0.60869565]
|
|
|
|
mean value: 0.6391304347826087
|
|
|
|
key: train_recall
|
|
value: [0.66666667 0.57971014 0.61352657 0.75362319 0.65217391 0.7826087
|
|
0.63285024 0.62318841 0.63285024 0.66666667]
|
|
|
|
mean value: 0.6603864734299517
|
|
|
|
key: test_roc_auc
|
|
value: [0.67391304 0.65217391 0.65217391 0.76086957 0.67391304 0.73913043
|
|
0.76086957 0.7173913 0.7173913 0.69565217]
|
|
|
|
mean value: 0.7043478260869565
|
|
|
|
key: train_roc_auc
|
|
value: [0.71980676 0.70531401 0.71980676 0.73913043 0.71980676 0.73913043
|
|
0.72222222 0.7173913 0.72222222 0.72705314]
|
|
|
|
mean value: 0.7231884057971014
|
|
|
|
key: test_jcc
|
|
value: [0.48275862 0.46666667 0.44827586 0.62068966 0.44444444 0.6
|
|
0.60714286 0.51851852 0.5 0.5 ]
|
|
|
|
mean value: 0.5188496624703521
|
|
|
|
key: train_jcc
|
|
value: [0.54330709 0.49586777 0.52263374 0.59090909 0.53784861 0.6
|
|
0.53252033 0.52439024 0.53252033 0.5498008 ]
|
|
|
|
mean value: 0.5429797987673654
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01003051 0.01000547 0.01006913 0.00997734 0.01001358 0.01002645
|
|
0.01001191 0.00994897 0.00999188 0.00996375]
|
|
|
|
mean value: 0.010003900527954102
|
|
|
|
key: score_time
|
|
value: [0.00892138 0.00891447 0.00889754 0.00898504 0.00888562 0.008883
|
|
0.00893068 0.00893736 0.00896692 0.00892067]
|
|
|
|
mean value: 0.008924269676208496
|
|
|
|
key: test_mcc
|
|
value: [0.26311741 0.26111648 0.30434783 0.30905755 0.34815531 0.4454354
|
|
0.48566186 0.41736501 0.47826087 0.43519414]
|
|
|
|
mean value: 0.37477118607890214
|
|
|
|
key: train_mcc
|
|
value: [0.4977937 0.54289671 0.52179393 0.51693234 0.51729468 0.49582377
|
|
0.4882875 0.47503462 0.49992343 0.47370088]
|
|
|
|
mean value: 0.5029481566309334
|
|
|
|
key: test_accuracy
|
|
value: [0.63043478 0.63043478 0.65217391 0.65217391 0.67391304 0.7173913
|
|
0.73913043 0.69565217 0.73913043 0.7173913 ]
|
|
|
|
mean value: 0.6847826086956521
|
|
|
|
key: train_accuracy
|
|
value: [0.74879227 0.7705314 0.76086957 0.75845411 0.75845411 0.74637681
|
|
0.74396135 0.73671498 0.74879227 0.73671498]
|
|
|
|
mean value: 0.7509661835748792
|
|
|
|
key: test_fscore
|
|
value: [0.65306122 0.63829787 0.65217391 0.61904762 0.66666667 0.74509804
|
|
0.76 0.74074074 0.73913043 0.71111111]
|
|
|
|
mean value: 0.6925327621438132
|
|
|
|
key: train_fscore
|
|
value: [0.75238095 0.77958237 0.76258993 0.75728155 0.76303318 0.7597254
|
|
0.74881517 0.74709977 0.76036866 0.73218673]
|
|
|
|
mean value: 0.7563063705878426
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.625 0.65217391 0.68421053 0.68181818 0.67857143
|
|
0.7037037 0.64516129 0.73913043 0.72727273]
|
|
|
|
mean value: 0.6752426821215114
|
|
|
|
key: train_precision
|
|
value: [0.74178404 0.75 0.75714286 0.76097561 0.74883721 0.72173913
|
|
0.73488372 0.71875 0.72687225 0.745 ]
|
|
|
|
mean value: 0.7405984811821016
|
|
|
|
key: test_recall
|
|
value: [0.69565217 0.65217391 0.65217391 0.56521739 0.65217391 0.82608696
|
|
0.82608696 0.86956522 0.73913043 0.69565217]
|
|
|
|
mean value: 0.717391304347826
|
|
|
|
key: train_recall
|
|
value: [0.76328502 0.8115942 0.76811594 0.75362319 0.77777778 0.80193237
|
|
0.76328502 0.77777778 0.79710145 0.71980676]
|
|
|
|
mean value: 0.7734299516908213
|
|
|
|
key: test_roc_auc
|
|
value: [0.63043478 0.63043478 0.65217391 0.65217391 0.67391304 0.7173913
|
|
0.73913043 0.69565217 0.73913043 0.7173913 ]
|
|
|
|
mean value: 0.6847826086956521
|
|
|
|
key: train_roc_auc
|
|
value: [0.74879227 0.7705314 0.76086957 0.75845411 0.75845411 0.74637681
|
|
0.74396135 0.73671498 0.74879227 0.73671498]
|
|
|
|
mean value: 0.7509661835748792
|
|
|
|
key: test_jcc
|
|
value: [0.48484848 0.46875 0.48387097 0.44827586 0.5 0.59375
|
|
0.61290323 0.58823529 0.5862069 0.55172414]
|
|
|
|
mean value: 0.5318564869066243
|
|
|
|
key: train_jcc
|
|
value: [0.60305344 0.63878327 0.61627907 0.609375 0.61685824 0.61254613
|
|
0.59848485 0.5962963 0.6133829 0.57751938]
|
|
|
|
mean value: 0.6082578562107429
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00986171 0.00919127 0.01064014 0.00955677 0.01044106 0.01053977
|
|
0.01064229 0.01063395 0.01053476 0.01057172]
|
|
|
|
mean value: 0.010261344909667968
|
|
|
|
key: score_time
|
|
value: [0.01187658 0.01271725 0.01210403 0.01190686 0.01244688 0.01247811
|
|
0.0124712 0.01251221 0.01339221 0.01789713]
|
|
|
|
mean value: 0.012980246543884277
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 0. 0.22075539 0.34815531 0.22518867 0.34815531
|
|
0.1351132 0.04415108 0.08908708 0.35082321]
|
|
|
|
mean value: 0.1979647153120915
|
|
|
|
key: train_mcc
|
|
value: [0.58588257 0.59520387 0.53645729 0.52777851 0.52444489 0.51751244
|
|
0.55578908 0.57512087 0.57085489 0.52857222]
|
|
|
|
mean value: 0.5517616648647189
|
|
|
|
key: test_accuracy
|
|
value: [0.60869565 0.5 0.60869565 0.67391304 0.60869565 0.67391304
|
|
0.56521739 0.52173913 0.54347826 0.67391304]
|
|
|
|
mean value: 0.5978260869565217
|
|
|
|
key: train_accuracy
|
|
value: [0.79227053 0.79710145 0.76811594 0.76328502 0.76086957 0.75845411
|
|
0.77777778 0.78743961 0.78502415 0.76328502]
|
|
|
|
mean value: 0.7753623188405797
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.54901961 0.57142857 0.66666667 0.65384615 0.66666667
|
|
0.61538462 0.56 0.48780488 0.69387755]
|
|
|
|
mean value: 0.6089694710905
|
|
|
|
key: train_fscore
|
|
value: [0.79906542 0.8028169 0.77142857 0.77102804 0.77241379 0.76415094
|
|
0.78095238 0.79047619 0.79058824 0.77314815]
|
|
|
|
mean value: 0.7816068622151459
|
|
|
|
key: test_precision
|
|
value: [0.6 0.5 0.63157895 0.68181818 0.5862069 0.68181818
|
|
0.55172414 0.51851852 0.55555556 0.65384615]
|
|
|
|
mean value: 0.5961066573407771
|
|
|
|
key: train_precision
|
|
value: [0.77375566 0.78082192 0.76056338 0.74660633 0.73684211 0.74654378
|
|
0.76995305 0.77934272 0.7706422 0.74222222]
|
|
|
|
mean value: 0.7607293371810109
|
|
|
|
key: test_recall
|
|
value: [0.65217391 0.60869565 0.52173913 0.65217391 0.73913043 0.65217391
|
|
0.69565217 0.60869565 0.43478261 0.73913043]
|
|
|
|
mean value: 0.6304347826086957
|
|
|
|
key: train_recall
|
|
value: [0.82608696 0.82608696 0.7826087 0.79710145 0.8115942 0.7826087
|
|
0.79227053 0.80193237 0.8115942 0.80676329]
|
|
|
|
mean value: 0.8038647342995169
|
|
|
|
key: test_roc_auc
|
|
value: [0.60869565 0.5 0.60869565 0.67391304 0.60869565 0.67391304
|
|
0.56521739 0.52173913 0.54347826 0.67391304]
|
|
|
|
mean value: 0.5978260869565217
|
|
|
|
key: train_roc_auc
|
|
value: [0.79227053 0.79710145 0.76811594 0.76328502 0.76086957 0.75845411
|
|
0.77777778 0.78743961 0.78502415 0.76328502]
|
|
|
|
mean value: 0.7753623188405797
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.37837838 0.4 0.5 0.48571429 0.5
|
|
0.44444444 0.38888889 0.32258065 0.53125 ]
|
|
|
|
mean value: 0.4405802097132742
|
|
|
|
key: train_jcc
|
|
value: [0.66536965 0.67058824 0.62790698 0.62737643 0.62921348 0.61832061
|
|
0.640625 0.65354331 0.6536965 0.63018868]
|
|
|
|
mean value: 0.6416828865918727
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02020502 0.01990628 0.02276731 0.02351856 0.02368045 0.02389264
|
|
0.02385902 0.02400041 0.02121592 0.02006316]
|
|
|
|
mean value: 0.022310876846313478
|
|
|
|
key: score_time
|
|
value: [0.01165295 0.01172495 0.01302648 0.0129962 0.0127008 0.0130198
|
|
0.01305652 0.01332688 0.01172519 0.01192451]
|
|
|
|
mean value: 0.012515425682067871
|
|
|
|
key: test_mcc
|
|
value: [0.45643546 0.40533961 0.39130435 0.43519414 0.43519414 0.56736651
|
|
0.49541508 0.69560834 0.52623481 0.52623481]
|
|
|
|
mean value: 0.4934327271836572
|
|
|
|
key: train_mcc
|
|
value: [0.66295066 0.72998103 0.6576267 0.71739923 0.67465395 0.70830403
|
|
0.67772632 0.68747778 0.67427068 0.70607487]
|
|
|
|
mean value: 0.6896465241231877
|
|
|
|
key: test_accuracy
|
|
value: [0.7173913 0.69565217 0.69565217 0.7173913 0.7173913 0.7826087
|
|
0.73913043 0.82608696 0.76086957 0.76086957]
|
|
|
|
mean value: 0.741304347826087
|
|
|
|
key: train_accuracy
|
|
value: [0.83091787 0.86231884 0.82850242 0.85748792 0.83574879 0.852657
|
|
0.83574879 0.84057971 0.83333333 0.85024155]
|
|
|
|
mean value: 0.8427536231884059
|
|
|
|
key: test_fscore
|
|
value: [0.75471698 0.73076923 0.69565217 0.71111111 0.72340426 0.79166667
|
|
0.76923077 0.85185185 0.74418605 0.74418605]
|
|
|
|
mean value: 0.7516775133017153
|
|
|
|
key: train_fscore
|
|
value: [0.83568075 0.87015945 0.8321513 0.86310905 0.84331797 0.8591224
|
|
0.84615385 0.85067873 0.84494382 0.85909091]
|
|
|
|
mean value: 0.8504408236135929
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.65517241 0.69565217 0.72727273 0.70833333 0.76
|
|
0.68965517 0.74193548 0.8 0.8 ]
|
|
|
|
mean value: 0.7244687971263635
|
|
|
|
key: train_precision
|
|
value: [0.81278539 0.82327586 0.81481481 0.83035714 0.8061674 0.82300885
|
|
0.79574468 0.8 0.78991597 0.8111588 ]
|
|
|
|
mean value: 0.8107228903828236
|
|
|
|
key: test_recall
|
|
value: [0.86956522 0.82608696 0.69565217 0.69565217 0.73913043 0.82608696
|
|
0.86956522 1. 0.69565217 0.69565217]
|
|
|
|
mean value: 0.7913043478260869
|
|
|
|
key: train_recall
|
|
value: [0.85990338 0.92270531 0.85024155 0.89855072 0.88405797 0.89855072
|
|
0.90338164 0.90821256 0.90821256 0.91304348]
|
|
|
|
mean value: 0.8946859903381643
|
|
|
|
key: test_roc_auc
|
|
value: [0.7173913 0.69565217 0.69565217 0.7173913 0.7173913 0.7826087
|
|
0.73913043 0.82608696 0.76086957 0.76086957]
|
|
|
|
mean value: 0.741304347826087
|
|
|
|
key: train_roc_auc
|
|
value: [0.83091787 0.86231884 0.82850242 0.85748792 0.83574879 0.852657
|
|
0.83574879 0.84057971 0.83333333 0.85024155]
|
|
|
|
mean value: 0.8427536231884059
|
|
|
|
key: test_jcc
|
|
value: [0.60606061 0.57575758 0.53333333 0.55172414 0.56666667 0.65517241
|
|
0.625 0.74193548 0.59259259 0.59259259]
|
|
|
|
mean value: 0.6040835402598472
|
|
|
|
key: train_jcc
|
|
value: [0.71774194 0.77016129 0.71255061 0.75918367 0.72908367 0.75303644
|
|
0.73333333 0.74015748 0.73151751 0.75298805]
|
|
|
|
mean value: 0.7399753980333583
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.48387837 1.04218054 0.88318419 0.63158584 1.02904654 1.14515352
|
|
1.52221656 0.689291 1.10090923 1.16517997]
|
|
|
|
mean value: 1.0692625761032104
|
|
|
|
key: score_time
|
|
value: [0.01243424 0.01250696 0.01239061 0.01240277 0.01242542 0.01239514
|
|
0.01235199 0.01239777 0.01241636 0.01621032]
|
|
|
|
mean value: 0.012793159484863282
|
|
|
|
key: test_mcc
|
|
value: [0.4454354 0.47245559 0.52623481 0.48007936 0.47826087 0.43519414
|
|
0.57396402 0.4454354 0.53452248 0.6092718 ]
|
|
|
|
mean value: 0.5000853878258789
|
|
|
|
key: train_mcc
|
|
value: [0.83310808 0.72761502 0.74082672 0.64112383 0.79272392 0.77447567
|
|
0.86965986 0.67772632 0.74562472 0.80533198]
|
|
|
|
mean value: 0.7608216120675093
|
|
|
|
key: test_accuracy
|
|
value: [0.7173913 0.7173913 0.76086957 0.73913043 0.73913043 0.7173913
|
|
0.7826087 0.7173913 0.76086957 0.80434783]
|
|
|
|
mean value: 0.7456521739130435
|
|
|
|
key: train_accuracy
|
|
value: [0.91545894 0.85024155 0.86956522 0.80434783 0.89613527 0.88647343
|
|
0.93236715 0.83574879 0.85990338 0.90096618]
|
|
|
|
mean value: 0.87512077294686
|
|
|
|
key: test_fscore
|
|
value: [0.74509804 0.76363636 0.74418605 0.75 0.73913043 0.71111111
|
|
0.8 0.74509804 0.78431373 0.8 ]
|
|
|
|
mean value: 0.7582573759963279
|
|
|
|
key: train_fscore
|
|
value: [0.91841492 0.86808511 0.865 0.8308977 0.89786223 0.88992974
|
|
0.93577982 0.84615385 0.87606838 0.90531178]
|
|
|
|
mean value: 0.883350352054179
|
|
|
|
key: test_precision
|
|
value: [0.67857143 0.65625 0.8 0.72 0.73913043 0.72727273
|
|
0.74074074 0.67857143 0.71428571 0.81818182]
|
|
|
|
mean value: 0.7273004292406466
|
|
|
|
key: train_precision
|
|
value: [0.88738739 0.7756654 0.89637306 0.73161765 0.88317757 0.86363636
|
|
0.89082969 0.79574468 0.78544061 0.86725664]
|
|
|
|
mean value: 0.8377129049779565
|
|
|
|
key: test_recall
|
|
value: [0.82608696 0.91304348 0.69565217 0.7826087 0.73913043 0.69565217
|
|
0.86956522 0.82608696 0.86956522 0.7826087 ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [0.95169082 0.98550725 0.83574879 0.96135266 0.91304348 0.9178744
|
|
0.98550725 0.90338164 0.99033816 0.9468599 ]
|
|
|
|
mean value: 0.9391304347826087
|
|
|
|
key: test_roc_auc
|
|
value: [0.7173913 0.7173913 0.76086957 0.73913043 0.73913043 0.7173913
|
|
0.7826087 0.7173913 0.76086957 0.80434783]
|
|
|
|
mean value: 0.7456521739130435
|
|
|
|
key: train_roc_auc
|
|
value: [0.91545894 0.85024155 0.86956522 0.80434783 0.89613527 0.88647343
|
|
0.93236715 0.83574879 0.85990338 0.90096618]
|
|
|
|
mean value: 0.87512077294686
|
|
|
|
key: test_jcc
|
|
value: [0.59375 0.61764706 0.59259259 0.6 0.5862069 0.55172414
|
|
0.66666667 0.59375 0.64516129 0.66666667]
|
|
|
|
mean value: 0.6114165309554794
|
|
|
|
key: train_jcc
|
|
value: [0.84913793 0.76691729 0.76211454 0.71071429 0.81465517 0.80168776
|
|
0.87931034 0.73333333 0.77946768 0.82700422]
|
|
|
|
mean value: 0.7924342561732226
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02884746 0.02460098 0.02956152 0.0261054 0.02656984 0.02446222
|
|
0.02462029 0.02391934 0.02574706 0.02467108]
|
|
|
|
mean value: 0.02591052055358887
|
|
|
|
key: score_time
|
|
value: [0.01191378 0.00934911 0.00957465 0.00892711 0.00892615 0.00891709
|
|
0.00895023 0.00892878 0.0089879 0.00895309]
|
|
|
|
mean value: 0.009342789649963379
|
|
|
|
key: test_mcc
|
|
value: [0.74194083 0.62360956 0.6092718 0.78935222 0.82608696 0.78935222
|
|
0.75056834 0.65465367 0.69631062 0.91304348]
|
|
|
|
mean value: 0.7394189686845861
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.80434783 0.80434783 0.89130435 0.91304348 0.89130435
|
|
0.86956522 0.82608696 0.84782609 0.95652174]
|
|
|
|
mean value: 0.8673913043478261
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.82352941 0.80851064 0.88372093 0.91304348 0.89795918
|
|
0.88 0.83333333 0.85106383 0.95652174]
|
|
|
|
mean value: 0.8722682544480478
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84 0.75 0.79166667 0.95 0.91304348 0.84615385
|
|
0.81481481 0.8 0.83333333 0.95652174]
|
|
|
|
mean value: 0.8495533878359965
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.91304348 0.91304348 0.82608696 0.82608696 0.91304348 0.95652174
|
|
0.95652174 0.86956522 0.86956522 0.95652174]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86956522 0.80434783 0.80434783 0.89130435 0.91304348 0.89130435
|
|
0.86956522 0.82608696 0.84782609 0.95652174]
|
|
|
|
mean value: 0.8673913043478261
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.7 0.67857143 0.79166667 0.84 0.81481481
|
|
0.78571429 0.71428571 0.74074074 0.91666667]
|
|
|
|
mean value: 0.7760238095238094
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12353754 0.12308574 0.12323904 0.12355566 0.12350321 0.12364197
|
|
0.12468982 0.1238718 0.12950373 0.12978148]
|
|
|
|
mean value: 0.12484099864959716
|
|
|
|
key: score_time
|
|
value: [0.01787996 0.01805329 0.01808262 0.0181849 0.01806045 0.01811862
|
|
0.0181942 0.0181005 0.01856756 0.01836109]
|
|
|
|
mean value: 0.018160319328308104
|
|
|
|
key: test_mcc
|
|
value: [0.56736651 0.24140227 0.43852901 0.61394061 0.52223297 0.48007936
|
|
0.52623481 0.4454354 0.48007936 0.48007936]
|
|
|
|
mean value: 0.47953796707708835
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.60869565 0.7173913 0.80434783 0.76086957 0.73913043
|
|
0.76086957 0.7173913 0.73913043 0.73913043]
|
|
|
|
mean value: 0.7369565217391304
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.79166667 0.67857143 0.69767442 0.79069767 0.76595745 0.72727273
|
|
0.74418605 0.74509804 0.72727273 0.72727273]
|
|
|
|
mean value: 0.7395669902615358
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76 0.57575758 0.75 0.85 0.75 0.76190476
|
|
0.8 0.67857143 0.76190476 0.76190476]
|
|
|
|
mean value: 0.745004329004329
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.82608696 0.82608696 0.65217391 0.73913043 0.7826087 0.69565217
|
|
0.69565217 0.82608696 0.69565217 0.69565217]
|
|
|
|
mean value: 0.7434782608695653
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7826087 0.60869565 0.7173913 0.80434783 0.76086957 0.73913043
|
|
0.76086957 0.7173913 0.73913043 0.73913043]
|
|
|
|
mean value: 0.7369565217391304
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.65517241 0.51351351 0.53571429 0.65384615 0.62068966 0.57142857
|
|
0.59259259 0.59375 0.57142857 0.57142857]
|
|
|
|
mean value: 0.5879564328917777
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01158834 0.01016188 0.01072598 0.01104307 0.01108384 0.01038527
|
|
0.01136303 0.01132059 0.01050973 0.01084805]
|
|
|
|
mean value: 0.010902976989746094
|
|
|
|
key: score_time
|
|
value: [0.00946069 0.00910878 0.00992489 0.00964308 0.00900888 0.00962877
|
|
0.00933528 0.0089159 0.00932121 0.0091238 ]
|
|
|
|
mean value: 0.009347128868103027
|
|
|
|
key: test_mcc
|
|
value: [ 0.35082321 -0.04347826 0.26726124 0.17407766 0.39735971 0.26311741
|
|
0.49541508 0.13043478 0.22518867 0.30434783]
|
|
|
|
mean value: 0.2564547324906315
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.67391304 0.47826087 0.63043478 0.58695652 0.69565217 0.63043478
|
|
0.73913043 0.56521739 0.60869565 0.65217391]
|
|
|
|
mean value: 0.6260869565217392
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.65116279 0.47826087 0.66666667 0.57777778 0.66666667 0.65306122
|
|
0.76923077 0.56521739 0.55 0.65217391]
|
|
|
|
mean value: 0.6230218069442394
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.47826087 0.60714286 0.59090909 0.73684211 0.61538462
|
|
0.68965517 0.56521739 0.64705882 0.65217391]
|
|
|
|
mean value: 0.628264483855597
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.60869565 0.47826087 0.73913043 0.56521739 0.60869565 0.69565217
|
|
0.86956522 0.56521739 0.47826087 0.65217391]
|
|
|
|
mean value: 0.6260869565217391
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.67391304 0.47826087 0.63043478 0.58695652 0.69565217 0.63043478
|
|
0.73913043 0.56521739 0.60869565 0.65217391]
|
|
|
|
mean value: 0.6260869565217391
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.48275862 0.31428571 0.5 0.40625 0.5 0.48484848
|
|
0.625 0.39393939 0.37931034 0.48387097]
|
|
|
|
mean value: 0.457026352633277
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.75904489 1.80482984 1.76845455 1.77710342 1.78683972 1.7867794
|
|
1.81268549 1.771945 1.7612524 1.76990128]
|
|
|
|
mean value: 1.7798835992813111
|
|
|
|
key: score_time
|
|
value: [0.09351516 0.10056996 0.09473276 0.09771657 0.09969068 0.10154796
|
|
0.10460901 0.09457302 0.09260321 0.15258074]
|
|
|
|
mean value: 0.10321390628814697
|
|
|
|
key: test_mcc
|
|
value: [0.91651514 0.56694671 0.56521739 0.78935222 0.78334945 0.74194083
|
|
0.74194083 0.52223297 0.75056834 0.78334945]
|
|
|
|
mean value: 0.7161413317928996
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95652174 0.76086957 0.7826087 0.89130435 0.89130435 0.86956522
|
|
0.86956522 0.76086957 0.86956522 0.89130435]
|
|
|
|
mean value: 0.8543478260869565
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.8 0.7826087 0.88372093 0.89361702 0.875
|
|
0.875 0.76595745 0.85714286 0.88888889]
|
|
|
|
mean value: 0.8580269173334918
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.92 0.6875 0.7826087 0.95 0.875 0.84
|
|
0.84 0.75 0.94736842 0.90909091]
|
|
|
|
mean value: 0.8501568025795715
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.95652174 0.7826087 0.82608696 0.91304348 0.91304348
|
|
0.91304348 0.7826087 0.7826087 0.86956522]
|
|
|
|
mean value: 0.8739130434782608
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95652174 0.76086957 0.7826087 0.89130435 0.89130435 0.86956522
|
|
0.86956522 0.76086957 0.86956522 0.89130435]
|
|
|
|
mean value: 0.8543478260869566
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.66666667 0.64285714 0.79166667 0.80769231 0.77777778
|
|
0.77777778 0.62068966 0.75 0.8 ]
|
|
|
|
mean value: 0.7555127994610753
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.9309938 0.97561812 0.97212815 0.9395082 0.96008706 0.96088171
|
|
0.955127 0.95128441 0.90736485 0.98695827]
|
|
|
|
mean value: 0.953995156288147
|
|
|
|
key: score_time
|
|
value: [0.22832513 0.25179029 0.13333225 0.21760082 0.20836687 0.2838335
|
|
0.25420737 0.27094269 0.22470903 0.20994496]
|
|
|
|
mean value: 0.2283052921295166
|
|
|
|
key: test_mcc
|
|
value: [0.87705802 0.54772256 0.65217391 0.82922798 0.73913043 0.74194083
|
|
0.78334945 0.61394061 0.78935222 0.78334945]
|
|
|
|
mean value: 0.7357245468829955
|
|
|
|
key: train_mcc
|
|
value: [0.93275907 0.93306423 0.94273329 0.93773439 0.92780693 0.93773439
|
|
0.92841417 0.94242496 0.91372611 0.92806703]
|
|
|
|
mean value: 0.9324464576360868
|
|
|
|
key: test_accuracy
|
|
value: [0.93478261 0.76086957 0.82608696 0.91304348 0.86956522 0.86956522
|
|
0.89130435 0.80434783 0.89130435 0.89130435]
|
|
|
|
mean value: 0.8652173913043478
|
|
|
|
key: train_accuracy
|
|
value: [0.96618357 0.96618357 0.97101449 0.96859903 0.96376812 0.96859903
|
|
0.96376812 0.97101449 0.95652174 0.96376812]
|
|
|
|
mean value: 0.9659420289855072
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 0.79245283 0.82608696 0.90909091 0.86956522 0.875
|
|
0.89361702 0.81632653 0.88372093 0.88888889]
|
|
|
|
mean value: 0.8693524794407002
|
|
|
|
key: train_fscore
|
|
value: [0.96666667 0.96682464 0.97156398 0.96912114 0.96420048 0.96912114
|
|
0.96453901 0.97142857 0.95734597 0.96437055]
|
|
|
|
mean value: 0.9665182146274129
|
|
|
|
key: test_precision
|
|
value: [0.88461538 0.7 0.82608696 0.95238095 0.86956522 0.84
|
|
0.875 0.76923077 0.95 0.90909091]
|
|
|
|
mean value: 0.8575970189231059
|
|
|
|
key: train_precision
|
|
value: [0.95305164 0.94883721 0.95348837 0.95327103 0.95283019 0.95327103
|
|
0.94444444 0.95774648 0.93953488 0.94859813]
|
|
|
|
mean value: 0.9505073407221585
|
|
|
|
key: test_recall
|
|
value: [1. 0.91304348 0.82608696 0.86956522 0.86956522 0.91304348
|
|
0.91304348 0.86956522 0.82608696 0.86956522]
|
|
|
|
mean value: 0.8869565217391304
|
|
|
|
key: train_recall
|
|
value: [0.98067633 0.98550725 0.99033816 0.98550725 0.97584541 0.98550725
|
|
0.98550725 0.98550725 0.97584541 0.98067633]
|
|
|
|
mean value: 0.9830917874396136
|
|
|
|
key: test_roc_auc
|
|
value: [0.93478261 0.76086957 0.82608696 0.91304348 0.86956522 0.86956522
|
|
0.89130435 0.80434783 0.89130435 0.89130435]
|
|
|
|
mean value: 0.8652173913043478
|
|
|
|
key: train_roc_auc
|
|
value: [0.96618357 0.96618357 0.97101449 0.96859903 0.96376812 0.96859903
|
|
0.96376812 0.97101449 0.95652174 0.96376812]
|
|
|
|
mean value: 0.9659420289855072
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 0.65625 0.7037037 0.83333333 0.76923077 0.77777778
|
|
0.80769231 0.68965517 0.79166667 0.8 ]
|
|
|
|
mean value: 0.7713925115433736
|
|
|
|
key: train_jcc
|
|
value: [0.93548387 0.93577982 0.94470046 0.94009217 0.93087558 0.94009217
|
|
0.93150685 0.94444444 0.91818182 0.93119266]
|
|
|
|
mean value: 0.9352349828636888
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02340174 0.00980616 0.00984836 0.00980854 0.01029038 0.01004672
|
|
0.0101068 0.01000142 0.00982833 0.00993443]
|
|
|
|
mean value: 0.011307287216186523
|
|
|
|
key: score_time
|
|
value: [0.01417089 0.00880098 0.00897098 0.00886726 0.00935221 0.00891757
|
|
0.00887465 0.0089643 0.00900984 0.008883 ]
|
|
|
|
mean value: 0.009481167793273926
|
|
|
|
key: test_mcc
|
|
value: [0.26311741 0.26111648 0.30434783 0.30905755 0.34815531 0.4454354
|
|
0.48566186 0.41736501 0.47826087 0.43519414]
|
|
|
|
mean value: 0.37477118607890214
|
|
|
|
key: train_mcc
|
|
value: [0.4977937 0.54289671 0.52179393 0.51693234 0.51729468 0.49582377
|
|
0.4882875 0.47503462 0.49992343 0.47370088]
|
|
|
|
mean value: 0.5029481566309334
|
|
|
|
key: test_accuracy
|
|
value: [0.63043478 0.63043478 0.65217391 0.65217391 0.67391304 0.7173913
|
|
0.73913043 0.69565217 0.73913043 0.7173913 ]
|
|
|
|
mean value: 0.6847826086956521
|
|
|
|
key: train_accuracy
|
|
value: [0.74879227 0.7705314 0.76086957 0.75845411 0.75845411 0.74637681
|
|
0.74396135 0.73671498 0.74879227 0.73671498]
|
|
|
|
mean value: 0.7509661835748792
|
|
|
|
key: test_fscore
|
|
value: [0.65306122 0.63829787 0.65217391 0.61904762 0.66666667 0.74509804
|
|
0.76 0.74074074 0.73913043 0.71111111]
|
|
|
|
mean value: 0.6925327621438132
|
|
|
|
key: train_fscore
|
|
value: [0.75238095 0.77958237 0.76258993 0.75728155 0.76303318 0.7597254
|
|
0.74881517 0.74709977 0.76036866 0.73218673]
|
|
|
|
mean value: 0.7563063705878426
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.625 0.65217391 0.68421053 0.68181818 0.67857143
|
|
0.7037037 0.64516129 0.73913043 0.72727273]
|
|
|
|
mean value: 0.6752426821215114
|
|
|
|
key: train_precision
|
|
value: [0.74178404 0.75 0.75714286 0.76097561 0.74883721 0.72173913
|
|
0.73488372 0.71875 0.72687225 0.745 ]
|
|
|
|
mean value: 0.7405984811821016
|
|
|
|
key: test_recall
|
|
value: [0.69565217 0.65217391 0.65217391 0.56521739 0.65217391 0.82608696
|
|
0.82608696 0.86956522 0.73913043 0.69565217]
|
|
|
|
mean value: 0.717391304347826
|
|
|
|
key: train_recall
|
|
value: [0.76328502 0.8115942 0.76811594 0.75362319 0.77777778 0.80193237
|
|
0.76328502 0.77777778 0.79710145 0.71980676]
|
|
|
|
mean value: 0.7734299516908213
|
|
|
|
key: test_roc_auc
|
|
value: [0.63043478 0.63043478 0.65217391 0.65217391 0.67391304 0.7173913
|
|
0.73913043 0.69565217 0.73913043 0.7173913 ]
|
|
|
|
mean value: 0.6847826086956521
|
|
|
|
key: train_roc_auc
|
|
value: [0.74879227 0.7705314 0.76086957 0.75845411 0.75845411 0.74637681
|
|
0.74396135 0.73671498 0.74879227 0.73671498]
|
|
|
|
mean value: 0.7509661835748792
|
|
|
|
key: test_jcc
|
|
value: [0.48484848 0.46875 0.48387097 0.44827586 0.5 0.59375
|
|
0.61290323 0.58823529 0.5862069 0.55172414]
|
|
|
|
mean value: 0.5318564869066243
|
|
|
|
key: train_jcc
|
|
value: [0.60305344 0.63878327 0.61627907 0.609375 0.61685824 0.61254613
|
|
0.59848485 0.5962963 0.6133829 0.57751938]
|
|
|
|
mean value: 0.6082578562107429
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.08936453 0.07846737 0.07669997 0.08492184 0.08044219 0.08309031
|
|
0.10936308 0.0800128 0.08465981 0.22779965]
|
|
|
|
mean value: 0.09948215484619141
|
|
|
|
key: score_time
|
|
value: [0.01102686 0.01109076 0.01118302 0.01103806 0.01191735 0.01103616
|
|
0.01134872 0.01110458 0.01118922 0.01178503]
|
|
|
|
mean value: 0.011271977424621582
|
|
|
|
key: test_mcc
|
|
value: [0.87038828 0.74194083 0.65465367 0.73913043 0.87038828 0.82922798
|
|
0.87705802 0.61394061 0.74194083 1. ]
|
|
|
|
mean value: 0.7938668934371033
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93478261 0.86956522 0.82608696 0.86956522 0.93478261 0.91304348
|
|
0.93478261 0.80434783 0.86956522 1. ]
|
|
|
|
mean value: 0.8956521739130435
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93617021 0.875 0.83333333 0.86956522 0.93333333 0.91666667
|
|
0.93877551 0.81632653 0.86363636 1. ]
|
|
|
|
mean value: 0.8982807167943285
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.84 0.8 0.86956522 0.95454545 0.88
|
|
0.88461538 0.76923077 0.9047619 1. ]
|
|
|
|
mean value: 0.8819385397211484
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95652174 0.91304348 0.86956522 0.86956522 0.91304348 0.95652174
|
|
1. 0.86956522 0.82608696 1. ]
|
|
|
|
mean value: 0.9173913043478261
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93478261 0.86956522 0.82608696 0.86956522 0.93478261 0.91304348
|
|
0.93478261 0.80434783 0.86956522 1. ]
|
|
|
|
mean value: 0.8956521739130434
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88 0.77777778 0.71428571 0.76923077 0.875 0.84615385
|
|
0.88461538 0.68965517 0.76 1. ]
|
|
|
|
mean value: 0.8196718664477285
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04487348 0.08185077 0.05841994 0.07512474 0.07208538 0.08272529
|
|
0.06066704 0.03999424 0.04051352 0.0768981 ]
|
|
|
|
mean value: 0.06331524848937989
|
|
|
|
key: score_time
|
|
value: [0.02183747 0.02446556 0.02108097 0.01225448 0.02332473 0.02089667
|
|
0.01228571 0.01221514 0.01829672 0.02186108]
|
|
|
|
mean value: 0.018851852416992186
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.37796447 0.43519414 0.61394061 0.48007936 0.48007936
|
|
0.56521739 0.57396402 0.52223297 0.69631062]
|
|
|
|
mean value: 0.5399636618548821
|
|
|
|
key: train_mcc
|
|
value: [0.79844422 0.83610009 0.85048969 0.79302043 0.84082503 0.86039548
|
|
0.83116038 0.82149572 0.85507246 0.81737947]
|
|
|
|
mean value: 0.8304382976209043
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.67391304 0.7173913 0.80434783 0.73913043 0.73913043
|
|
0.7826087 0.7826087 0.76086957 0.84782609]
|
|
|
|
mean value: 0.7673913043478261
|
|
|
|
key: train_accuracy
|
|
value: [0.89855072 0.9178744 0.92512077 0.89613527 0.92028986 0.92995169
|
|
0.91545894 0.91062802 0.92753623 0.90821256]
|
|
|
|
mean value: 0.914975845410628
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.72727273 0.71111111 0.81632653 0.75 0.75
|
|
0.7826087 0.8 0.75555556 0.85106383]
|
|
|
|
mean value: 0.777727178332438
|
|
|
|
key: train_fscore
|
|
value: [0.90140845 0.91904762 0.92601432 0.89834515 0.92124105 0.93111639
|
|
0.91646778 0.91169451 0.92753623 0.91037736]
|
|
|
|
mean value: 0.9163248864437317
|
|
|
|
key: test_precision
|
|
value: [0.8 0.625 0.72727273 0.76923077 0.72 0.72
|
|
0.7826087 0.74074074 0.77272727 0.83333333]
|
|
|
|
mean value: 0.7490913538957017
|
|
|
|
key: train_precision
|
|
value: [0.87671233 0.90610329 0.91509434 0.87962963 0.91037736 0.91588785
|
|
0.90566038 0.9009434 0.92753623 0.88940092]
|
|
|
|
mean value: 0.9027345720490176
|
|
|
|
key: test_recall
|
|
value: [0.86956522 0.86956522 0.69565217 0.86956522 0.7826087 0.7826087
|
|
0.7826087 0.86956522 0.73913043 0.86956522]
|
|
|
|
mean value: 0.8130434782608695
|
|
|
|
key: train_recall
|
|
value: [0.92753623 0.93236715 0.93719807 0.9178744 0.93236715 0.9468599
|
|
0.92753623 0.92270531 0.92753623 0.93236715]
|
|
|
|
mean value: 0.9304347826086956
|
|
|
|
key: test_roc_auc
|
|
value: [0.82608696 0.67391304 0.7173913 0.80434783 0.73913043 0.73913043
|
|
0.7826087 0.7826087 0.76086957 0.84782609]
|
|
|
|
mean value: 0.7673913043478261
|
|
|
|
key: train_roc_auc
|
|
value: [0.89855072 0.9178744 0.92512077 0.89613527 0.92028986 0.92995169
|
|
0.91545894 0.91062802 0.92753623 0.90821256]
|
|
|
|
mean value: 0.914975845410628
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.57142857 0.55172414 0.68965517 0.6 0.6
|
|
0.64285714 0.66666667 0.60714286 0.74074074]
|
|
|
|
mean value: 0.6384501003466521
|
|
|
|
key: train_jcc
|
|
value: [0.82051282 0.85022026 0.86222222 0.81545064 0.8539823 0.87111111
|
|
0.84581498 0.8377193 0.86486486 0.83549784]
|
|
|
|
mean value: 0.8457396339406997
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.03631878 0.0097506 0.00945354 0.00935531 0.00948024 0.00942874
|
|
0.0097692 0.00947857 0.00951481 0.00936866]
|
|
|
|
mean value: 0.012191843986511231
|
|
|
|
key: score_time
|
|
value: [0.00945902 0.00890803 0.00869727 0.00870514 0.00875044 0.00870657
|
|
0.00897241 0.008641 0.00870872 0.00867629]
|
|
|
|
mean value: 0.008822488784790038
|
|
|
|
key: test_mcc
|
|
value: [0.35082321 0.43852901 0.43519414 0.52623481 0.43852901 0.52623481
|
|
0.71269665 0.57396402 0.48566186 0.39130435]
|
|
|
|
mean value: 0.4879171868665441
|
|
|
|
key: train_mcc
|
|
value: [0.52777851 0.54796937 0.53784095 0.53320213 0.54335651 0.54796937
|
|
0.53215286 0.52041792 0.52498987 0.50810087]
|
|
|
|
mean value: 0.5323778367643669
|
|
|
|
key: test_accuracy
|
|
value: [0.67391304 0.7173913 0.7173913 0.76086957 0.7173913 0.76086957
|
|
0.84782609 0.7826087 0.73913043 0.69565217]
|
|
|
|
mean value: 0.741304347826087
|
|
|
|
key: train_accuracy
|
|
value: [0.76328502 0.77294686 0.76811594 0.76570048 0.7705314 0.77294686
|
|
0.76570048 0.75845411 0.76086957 0.75362319]
|
|
|
|
mean value: 0.7652173913043478
|
|
|
|
key: test_fscore
|
|
value: [0.69387755 0.73469388 0.72340426 0.74418605 0.73469388 0.7755102
|
|
0.8627451 0.8 0.71428571 0.69565217]
|
|
|
|
mean value: 0.7479048798272832
|
|
|
|
key: train_fscore
|
|
value: [0.77102804 0.78240741 0.77674419 0.774942 0.78060046 0.78240741
|
|
0.77176471 0.7716895 0.77345538 0.76056338]
|
|
|
|
mean value: 0.7745602456953206
|
|
|
|
key: test_precision
|
|
value: [0.65384615 0.69230769 0.70833333 0.8 0.69230769 0.73076923
|
|
0.78571429 0.74074074 0.78947368 0.69565217]
|
|
|
|
mean value: 0.7289144987142698
|
|
|
|
key: train_precision
|
|
value: [0.74660633 0.75111111 0.74887892 0.74553571 0.74778761 0.75111111
|
|
0.75229358 0.73160173 0.73478261 0.73972603]
|
|
|
|
mean value: 0.7449434751412146
|
|
|
|
key: test_recall
|
|
value: [0.73913043 0.7826087 0.73913043 0.69565217 0.7826087 0.82608696
|
|
0.95652174 0.86956522 0.65217391 0.69565217]
|
|
|
|
mean value: 0.7739130434782608
|
|
|
|
key: train_recall
|
|
value: [0.79710145 0.81642512 0.80676329 0.80676329 0.81642512 0.81642512
|
|
0.79227053 0.81642512 0.81642512 0.7826087 ]
|
|
|
|
mean value: 0.8067632850241546
|
|
|
|
key: test_roc_auc
|
|
value: [0.67391304 0.7173913 0.7173913 0.76086957 0.7173913 0.76086957
|
|
0.84782609 0.7826087 0.73913043 0.69565217]
|
|
|
|
mean value: 0.7413043478260869
|
|
|
|
key: train_roc_auc
|
|
value: [0.76328502 0.77294686 0.76811594 0.76570048 0.7705314 0.77294686
|
|
0.76570048 0.75845411 0.76086957 0.75362319]
|
|
|
|
mean value: 0.7652173913043478
|
|
|
|
key: test_jcc
|
|
value: [0.53125 0.58064516 0.56666667 0.59259259 0.58064516 0.63333333
|
|
0.75862069 0.66666667 0.55555556 0.53333333]
|
|
|
|
mean value: 0.5999309160383965
|
|
|
|
key: train_jcc
|
|
value: [0.62737643 0.64258555 0.63498099 0.63257576 0.64015152 0.64258555
|
|
0.62835249 0.62825279 0.63059701 0.61363636]
|
|
|
|
mean value: 0.6321094446924821
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01311207 0.01989889 0.01874471 0.01914358 0.01592255 0.01623392
|
|
0.02147722 0.03374481 0.03179073 0.01620793]
|
|
|
|
mean value: 0.020627641677856447
|
|
|
|
key: score_time
|
|
value: [0.0086832 0.01111603 0.01121593 0.01169395 0.01164913 0.01165795
|
|
0.01315379 0.01208568 0.01410437 0.01168466]
|
|
|
|
mean value: 0.011704468727111816
|
|
|
|
key: test_mcc
|
|
value: [0.60286056 0.35082321 0.2548236 0.48566186 0.56736651 0.53452248
|
|
0.70164642 0.56521739 0.65465367 0.34921515]
|
|
|
|
mean value: 0.5066790855968193
|
|
|
|
key: train_mcc
|
|
value: [0.66527661 0.7069807 0.59860241 0.73437665 0.71233921 0.62501968
|
|
0.66898551 0.77302805 0.66470211 0.33900469]
|
|
|
|
mean value: 0.6488315629359197
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.67391304 0.60869565 0.73913043 0.7826087 0.76086957
|
|
0.84782609 0.7826087 0.82608696 0.60869565]
|
|
|
|
mean value: 0.741304347826087
|
|
|
|
key: train_accuracy
|
|
value: [0.81884058 0.85024155 0.76811594 0.86714976 0.852657 0.80434783
|
|
0.82850242 0.88647343 0.82608696 0.61594203]
|
|
|
|
mean value: 0.8118357487922705
|
|
|
|
key: test_fscore
|
|
value: [0.81481481 0.69387755 0.47058824 0.71428571 0.79166667 0.73170732
|
|
0.85714286 0.7826087 0.83333333 0.35714286]
|
|
|
|
mean value: 0.7047168042426114
|
|
|
|
key: train_fscore
|
|
value: [0.84143763 0.83937824 0.70186335 0.86618005 0.86230248 0.77929155
|
|
0.84326711 0.88564477 0.84140969 0.39543726]
|
|
|
|
mean value: 0.7856212140391424
|
|
|
|
key: test_precision
|
|
value: [0.70967742 0.65384615 0.72727273 0.78947368 0.76 0.83333333
|
|
0.80769231 0.7826087 0.8 1. ]
|
|
|
|
mean value: 0.7863904321362061
|
|
|
|
key: train_precision
|
|
value: [0.7481203 0.90502793 0.9826087 0.87254902 0.80932203 0.89375
|
|
0.77642276 0.89215686 0.77327935 0.92857143]
|
|
|
|
mean value: 0.8581808390641985
|
|
|
|
key: test_recall
|
|
value: [0.95652174 0.73913043 0.34782609 0.65217391 0.82608696 0.65217391
|
|
0.91304348 0.7826087 0.86956522 0.2173913 ]
|
|
|
|
mean value: 0.6956521739130435
|
|
|
|
key: train_recall
|
|
value: [0.96135266 0.7826087 0.54589372 0.85990338 0.92270531 0.69082126
|
|
0.92270531 0.87922705 0.92270531 0.25120773]
|
|
|
|
mean value: 0.7739130434782608
|
|
|
|
key: test_roc_auc
|
|
value: [0.7826087 0.67391304 0.60869565 0.73913043 0.7826087 0.76086957
|
|
0.84782609 0.7826087 0.82608696 0.60869565]
|
|
|
|
mean value: 0.741304347826087
|
|
|
|
key: train_roc_auc
|
|
value: [0.81884058 0.85024155 0.76811594 0.86714976 0.852657 0.80434783
|
|
0.82850242 0.88647343 0.82608696 0.61594203]
|
|
|
|
mean value: 0.8118357487922705
|
|
|
|
key: test_jcc
|
|
value: [0.6875 0.53125 0.30769231 0.55555556 0.65517241 0.57692308
|
|
0.75 0.64285714 0.71428571 0.2173913 ]
|
|
|
|
mean value: 0.5638627515454727
|
|
|
|
key: train_jcc
|
|
value: [0.72627737 0.72321429 0.54066986 0.7639485 0.75793651 0.63839286
|
|
0.72900763 0.79475983 0.72623574 0.2464455 ]
|
|
|
|
mean value: 0.6646888075360328
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02258778 0.02051401 0.02079272 0.02237797 0.02512527 0.01947999
|
|
0.02412772 0.02076626 0.02018929 0.02270794]
|
|
|
|
mean value: 0.021866893768310545
|
|
|
|
key: score_time
|
|
value: [0.01178718 0.01172805 0.01170397 0.01167297 0.01199746 0.01166201
|
|
0.01171231 0.01165533 0.01168275 0.01171136]
|
|
|
|
mean value: 0.011731338500976563
|
|
|
|
key: test_mcc
|
|
value: [0.62764591 0.45643546 0.65465367 0.69560834 0.52704628 0.47826087
|
|
0.42163702 0.53452248 0.56736651 0.57396402]
|
|
|
|
mean value: 0.5537140580845729
|
|
|
|
key: train_mcc
|
|
value: [0.67561452 0.76901382 0.76940988 0.77019104 0.5115435 0.73971411
|
|
0.41968603 0.67396079 0.77317244 0.76329393]
|
|
|
|
mean value: 0.6865600066934703
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.7173913 0.82608696 0.82608696 0.7173913 0.73913043
|
|
0.67391304 0.76086957 0.7826087 0.7826087 ]
|
|
|
|
mean value: 0.7608695652173914
|
|
|
|
key: train_accuracy
|
|
value: [0.82125604 0.88405797 0.88405797 0.87922705 0.71014493 0.86714976
|
|
0.64975845 0.81884058 0.88647343 0.88164251]
|
|
|
|
mean value: 0.8282608695652174
|
|
|
|
key: test_fscore
|
|
value: [0.72222222 0.75471698 0.81818182 0.78947368 0.60606061 0.73913043
|
|
0.74576271 0.73170732 0.77272727 0.76190476]
|
|
|
|
mean value: 0.7441887810159469
|
|
|
|
key: train_fscore
|
|
value: [0.78857143 0.88679245 0.88059701 0.86772487 0.59459459 0.87471526
|
|
0.74060823 0.78386167 0.88782816 0.88192771]
|
|
|
|
mean value: 0.8187221394190056
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.85714286 1. 1. 0.73913043
|
|
0.61111111 0.83333333 0.80952381 0.84210526]
|
|
|
|
mean value: 0.8359013475718281
|
|
|
|
key: train_precision
|
|
value: [0.96503497 0.86635945 0.90769231 0.95906433 0.98876404 0.82758621
|
|
0.58806818 0.97142857 0.87735849 0.87980769]
|
|
|
|
mean value: 0.8831164235178116
|
|
|
|
key: test_recall
|
|
value: [0.56521739 0.86956522 0.7826087 0.65217391 0.43478261 0.73913043
|
|
0.95652174 0.65217391 0.73913043 0.69565217]
|
|
|
|
mean value: 0.7086956521739131
|
|
|
|
key: train_recall
|
|
value: [0.66666667 0.90821256 0.85507246 0.79227053 0.42512077 0.92753623
|
|
1. 0.65700483 0.89855072 0.88405797]
|
|
|
|
mean value: 0.8014492753623188
|
|
|
|
key: test_roc_auc
|
|
value: [0.7826087 0.7173913 0.82608696 0.82608696 0.7173913 0.73913043
|
|
0.67391304 0.76086957 0.7826087 0.7826087 ]
|
|
|
|
mean value: 0.7608695652173912
|
|
|
|
key: train_roc_auc
|
|
value: [0.82125604 0.88405797 0.88405797 0.87922705 0.71014493 0.86714976
|
|
0.64975845 0.81884058 0.88647343 0.88164251]
|
|
|
|
mean value: 0.8282608695652174
|
|
|
|
key: test_jcc
|
|
value: [0.56521739 0.60606061 0.69230769 0.65217391 0.43478261 0.5862069
|
|
0.59459459 0.57692308 0.62962963 0.61538462]
|
|
|
|
mean value: 0.5953281024495417
|
|
|
|
key: train_jcc
|
|
value: [0.6509434 0.79661017 0.78666667 0.76635514 0.42307692 0.77732794
|
|
0.58806818 0.64454976 0.79828326 0.7887931 ]
|
|
|
|
mean value: 0.7020674540973326
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17487621 0.16196847 0.16231751 0.16288209 0.16240931 0.16275215
|
|
0.16204476 0.16248679 0.16236567 0.16225672]
|
|
|
|
mean value: 0.1636359691619873
|
|
|
|
key: score_time
|
|
value: [0.01524496 0.01564002 0.01540661 0.01534367 0.01538467 0.01562142
|
|
0.01539731 0.01527524 0.0153079 0.01526213]
|
|
|
|
mean value: 0.015388393402099609
|
|
|
|
key: test_mcc
|
|
value: [0.78334945 0.74194083 0.65465367 0.6092718 0.87038828 0.78935222
|
|
0.82922798 0.61394061 0.66226618 0.91304348]
|
|
|
|
mean value: 0.7467434495515832
|
|
|
|
key: train_mcc
|
|
value: [0.9565329 0.98072211 0.99518069 0.99038439 0.96139753 0.9758568
|
|
0.96619485 0.99518069 0.99038439 0.97594791]
|
|
|
|
mean value: 0.9787782268977475
|
|
|
|
key: test_accuracy
|
|
value: [0.89130435 0.86956522 0.82608696 0.80434783 0.93478261 0.89130435
|
|
0.91304348 0.80434783 0.82608696 0.95652174]
|
|
|
|
mean value: 0.8717391304347826
|
|
|
|
key: train_accuracy
|
|
value: [0.97826087 0.99033816 0.99758454 0.99516908 0.98067633 0.98792271
|
|
0.98309179 0.99758454 0.99516908 0.98792271]
|
|
|
|
mean value: 0.9893719806763285
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.875 0.83333333 0.8 0.93333333 0.89795918
|
|
0.91666667 0.81632653 0.80952381 0.95652174]
|
|
|
|
mean value: 0.872755348516218
|
|
|
|
key: train_fscore
|
|
value: [0.97831325 0.99038462 0.99757869 0.99519231 0.98076923 0.98795181
|
|
0.98313253 0.99757869 0.99519231 0.98800959]
|
|
|
|
mean value: 0.989410302921394
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.84 0.8 0.81818182 0.95454545 0.84615385
|
|
0.88 0.76923077 0.89473684 0.95652174]
|
|
|
|
mean value: 0.8668461378438496
|
|
|
|
key: train_precision
|
|
value: [0.97596154 0.98564593 1. 0.99043062 0.97607656 0.98557692
|
|
0.98076923 1. 0.99043062 0.98095238]
|
|
|
|
mean value: 0.9865843805317489
|
|
|
|
key: test_recall
|
|
value: [0.86956522 0.91304348 0.86956522 0.7826087 0.91304348 0.95652174
|
|
0.95652174 0.86956522 0.73913043 0.95652174]
|
|
|
|
mean value: 0.8826086956521739
|
|
|
|
key: train_recall
|
|
value: [0.98067633 0.99516908 0.99516908 1. 0.98550725 0.99033816
|
|
0.98550725 0.99516908 1. 0.99516908]
|
|
|
|
mean value: 0.9922705314009662
|
|
|
|
key: test_roc_auc
|
|
value: [0.89130435 0.86956522 0.82608696 0.80434783 0.93478261 0.89130435
|
|
0.91304348 0.80434783 0.82608696 0.95652174]
|
|
|
|
mean value: 0.8717391304347826
|
|
|
|
key: train_roc_auc
|
|
value: [0.97826087 0.99033816 0.99758454 0.99516908 0.98067633 0.98792271
|
|
0.98309179 0.99758454 0.99516908 0.98792271]
|
|
|
|
mean value: 0.9893719806763285
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.77777778 0.71428571 0.66666667 0.875 0.81481481
|
|
0.84615385 0.68965517 0.68 0.91666667]
|
|
|
|
mean value: 0.778102065877928
|
|
|
|
key: train_jcc
|
|
value: [0.95754717 0.98095238 0.99516908 0.99043062 0.96226415 0.97619048
|
|
0.96682464 0.99516908 0.99043062 0.97630332]
|
|
|
|
mean value: 0.9791281548253228
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06977606 0.07538342 0.0865984 0.08907437 0.09228969 0.07616353
|
|
0.08706713 0.0904603 0.08801723 0.07870221]
|
|
|
|
mean value: 0.08335323333740234
|
|
|
|
key: score_time
|
|
value: [0.02260113 0.02591181 0.03777814 0.04038143 0.02711153 0.02351594
|
|
0.04033351 0.0242157 0.02397299 0.02499533]
|
|
|
|
mean value: 0.02908174991607666
|
|
|
|
key: test_mcc
|
|
value: [0.87038828 0.66226618 0.56521739 0.78935222 0.87038828 0.87038828
|
|
0.82922798 0.56521739 0.61394061 0.95742711]
|
|
|
|
mean value: 0.759381372202104
|
|
|
|
key: train_mcc
|
|
value: [0.97594791 0.9758568 0.98072211 0.98551875 0.96139753 0.98072211
|
|
0.98561076 0.96135266 0.97101449 0.97119583]
|
|
|
|
mean value: 0.974933894100418
|
|
|
|
key: test_accuracy
|
|
value: [0.93478261 0.82608696 0.7826087 0.89130435 0.93478261 0.93478261
|
|
0.91304348 0.7826087 0.80434783 0.97826087]
|
|
|
|
mean value: 0.8782608695652174
|
|
|
|
key: train_accuracy
|
|
value: [0.98792271 0.98792271 0.99033816 0.99275362 0.98067633 0.99033816
|
|
0.99275362 0.98067633 0.98550725 0.98550725]
|
|
|
|
mean value: 0.9874396135265701
|
|
|
|
key: test_fscore
|
|
value: [0.93617021 0.84 0.7826087 0.88372093 0.93333333 0.93617021
|
|
0.91666667 0.7826087 0.79069767 0.97777778]
|
|
|
|
mean value: 0.8779754199265203
|
|
|
|
key: train_fscore
|
|
value: [0.98800959 0.98789346 0.99029126 0.99273608 0.98076923 0.99029126
|
|
0.99270073 0.98067633 0.98550725 0.98536585]
|
|
|
|
mean value: 0.9874241045783559
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.77777778 0.7826087 0.95 0.95454545 0.91666667
|
|
0.88 0.7826087 0.85 1. ]
|
|
|
|
mean value: 0.8810873956960914
|
|
|
|
key: train_precision
|
|
value: [0.98095238 0.99029126 0.99512195 0.99514563 0.97607656 0.99512195
|
|
1. 0.98067633 0.98550725 0.99507389]
|
|
|
|
mean value: 0.9893967198124055
|
|
|
|
key: test_recall
|
|
value: [0.95652174 0.91304348 0.7826087 0.82608696 0.91304348 0.95652174
|
|
0.95652174 0.7826087 0.73913043 0.95652174]
|
|
|
|
mean value: 0.8782608695652174
|
|
|
|
key: train_recall
|
|
value: [0.99516908 0.98550725 0.98550725 0.99033816 0.98550725 0.98550725
|
|
0.98550725 0.98067633 0.98550725 0.97584541]
|
|
|
|
mean value: 0.9855072463768116
|
|
|
|
key: test_roc_auc
|
|
value: [0.93478261 0.82608696 0.7826087 0.89130435 0.93478261 0.93478261
|
|
0.91304348 0.7826087 0.80434783 0.97826087]
|
|
|
|
mean value: 0.8782608695652174
|
|
|
|
key: train_roc_auc
|
|
value: [0.98792271 0.98792271 0.99033816 0.99275362 0.98067633 0.99033816
|
|
0.99275362 0.98067633 0.98550725 0.98550725]
|
|
|
|
mean value: 0.9874396135265701
|
|
|
|
key: test_jcc
|
|
value: [0.88 0.72413793 0.64285714 0.79166667 0.875 0.88
|
|
0.84615385 0.64285714 0.65384615 0.95652174]
|
|
|
|
mean value: 0.789304062254587
|
|
|
|
key: train_jcc
|
|
value: [0.97630332 0.97607656 0.98076923 0.98557692 0.96226415 0.98076923
|
|
0.98550725 0.96208531 0.97142857 0.97115385]
|
|
|
|
mean value: 0.975193438013435
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12681055 0.12293315 0.11621809 0.12390661 0.1576407 0.11773539
|
|
0.14127946 0.11725688 0.15466142 0.18126297]
|
|
|
|
mean value: 0.13597052097320556
|
|
|
|
key: score_time
|
|
value: [0.02440906 0.01473045 0.02721024 0.02339292 0.02557945 0.02327824
|
|
0.03188467 0.02571344 0.02675581 0.02720642]
|
|
|
|
mean value: 0.025016069412231445
|
|
|
|
key: test_mcc
|
|
value: [ 0.34815531 -0.04637389 0.35082321 0.67556602 0.40533961 0.52623481
|
|
0.30905755 0.22075539 0.30550505 0.3927922 ]
|
|
|
|
mean value: 0.3487855271090109
|
|
|
|
key: train_mcc
|
|
value: [0.97613021 0.98085947 0.98561076 0.97613021 0.96673649 0.97142265
|
|
0.98085947 0.96673649 0.97142265 0.98561076]
|
|
|
|
mean value: 0.9761519176586856
|
|
|
|
key: test_accuracy
|
|
value: [0.67391304 0.47826087 0.67391304 0.82608696 0.69565217 0.76086957
|
|
0.65217391 0.60869565 0.65217391 0.69565217]
|
|
|
|
mean value: 0.6717391304347826
|
|
|
|
key: train_accuracy
|
|
value: [0.98792271 0.99033816 0.99275362 0.98792271 0.98309179 0.98550725
|
|
0.99033816 0.98309179 0.98550725 0.99275362]
|
|
|
|
mean value: 0.9879227053140096
|
|
|
|
key: test_fscore
|
|
value: [0.68085106 0.55555556 0.65116279 0.8 0.73076923 0.74418605
|
|
0.68 0.64 0.63636364 0.70833333]
|
|
|
|
mean value: 0.6827221657060846
|
|
|
|
key: train_fscore
|
|
value: [0.98806683 0.99043062 0.99280576 0.98806683 0.98337292 0.98571429
|
|
0.99043062 0.98337292 0.98571429 0.99280576]
|
|
|
|
mean value: 0.9880780821020794
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.48387097 0.7 0.94117647 0.65517241 0.8
|
|
0.62962963 0.59259259 0.66666667 0.68 ]
|
|
|
|
mean value: 0.681577540767883
|
|
|
|
key: train_precision
|
|
value: [0.97641509 0.98104265 0.98571429 0.97641509 0.96728972 0.97183099
|
|
0.98104265 0.96728972 0.97183099 0.98571429]
|
|
|
|
mean value: 0.9764585479248011
|
|
|
|
key: test_recall
|
|
value: [0.69565217 0.65217391 0.60869565 0.69565217 0.82608696 0.69565217
|
|
0.73913043 0.69565217 0.60869565 0.73913043]
|
|
|
|
mean value: 0.6956521739130435
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.67391304 0.47826087 0.67391304 0.82608696 0.69565217 0.76086957
|
|
0.65217391 0.60869565 0.65217391 0.69565217]
|
|
|
|
mean value: 0.6717391304347826
|
|
|
|
key: train_roc_auc
|
|
value: [0.98792271 0.99033816 0.99275362 0.98792271 0.98309179 0.98550725
|
|
0.99033816 0.98309179 0.98550725 0.99275362]
|
|
|
|
mean value: 0.9879227053140097
|
|
|
|
key: test_jcc
|
|
value: [0.51612903 0.38461538 0.48275862 0.66666667 0.57575758 0.59259259
|
|
0.51515152 0.47058824 0.46666667 0.5483871 ]
|
|
|
|
mean value: 0.5219313386466432
|
|
|
|
key: train_jcc
|
|
value: [0.97641509 0.98104265 0.98571429 0.97641509 0.96728972 0.97183099
|
|
0.98104265 0.96728972 0.97183099 0.98571429]
|
|
|
|
mean value: 0.9764585479248011
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.65061069 0.63841701 0.63427401 0.63759327 0.63641667 0.64082813
|
|
0.64226913 0.63239646 0.64211464 0.63958859]
|
|
|
|
mean value: 0.6394508600234985
|
|
|
|
key: score_time
|
|
value: [0.01019311 0.00947547 0.00929451 0.00928974 0.00994682 0.00945592
|
|
0.00938773 0.00934958 0.00958204 0.00936604]
|
|
|
|
mean value: 0.009534096717834473
|
|
|
|
key: test_mcc
|
|
value: [0.87038828 0.82922798 0.65465367 0.78935222 0.87038828 0.82922798
|
|
0.87705802 0.61394061 0.73913043 0.95742711]
|
|
|
|
mean value: 0.803079458879572
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93478261 0.91304348 0.82608696 0.89130435 0.93478261 0.91304348
|
|
0.93478261 0.80434783 0.86956522 0.97826087]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93617021 0.91666667 0.83333333 0.88372093 0.93333333 0.91666667
|
|
0.93877551 0.81632653 0.86956522 0.97777778]
|
|
|
|
mean value: 0.9022336178983924
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.88 0.8 0.95 0.95454545 0.88
|
|
0.88461538 0.76923077 0.86956522 1. ]
|
|
|
|
mean value: 0.8904623492449579
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95652174 0.95652174 0.86956522 0.82608696 0.91304348 0.95652174
|
|
1. 0.86956522 0.86956522 0.95652174]
|
|
|
|
mean value: 0.9173913043478261
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93478261 0.91304348 0.82608696 0.89130435 0.93478261 0.91304348
|
|
0.93478261 0.80434783 0.86956522 0.97826087]
|
|
|
|
mean value: 0.9
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88 0.84615385 0.71428571 0.79166667 0.875 0.84615385
|
|
0.88461538 0.68965517 0.76923077 0.95652174]
|
|
|
|
mean value: 0.8253283138650455
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0364356 0.02996302 0.02851534 0.02908731 0.02904654 0.02905083
|
|
0.02902651 0.02826858 0.02909303 0.02910805]
|
|
|
|
mean value: 0.029759478569030762
|
|
|
|
key: score_time
|
|
value: [0.01263356 0.01271057 0.01395392 0.01488853 0.015131 0.0149157
|
|
0.01497221 0.01506925 0.01482534 0.01487589]
|
|
|
|
mean value: 0.014397597312927246
|
|
|
|
key: test_mcc
|
|
value: [0.22941573 0.15430335 0.31622777 0.12909944 0. 0.10540926
|
|
0.06052275 0.26413527 0.16439899 0.25819889]
|
|
|
|
mean value: 0.1681711452279104
|
|
|
|
key: train_mcc
|
|
value: [0.33601075 0.35786226 0.32249031 0.32249031 0.33154121 0.38316368
|
|
0.34484623 0.33154121 0.35786226 0.33601075]
|
|
|
|
mean value: 0.3423818953924683
|
|
|
|
key: test_accuracy
|
|
value: [0.58695652 0.54347826 0.63043478 0.54347826 0.5 0.54347826
|
|
0.52173913 0.56521739 0.56521739 0.58695652]
|
|
|
|
mean value: 0.558695652173913
|
|
|
|
key: train_accuracy
|
|
value: [0.60144928 0.61352657 0.5942029 0.5942029 0.59903382 0.62801932
|
|
0.60628019 0.59903382 0.61352657 0.60144928]
|
|
|
|
mean value: 0.605072463768116
|
|
|
|
key: test_fscore
|
|
value: [0.68852459 0.67692308 0.71186441 0.66666667 0.64615385 0.6440678
|
|
0.64516129 0.6969697 0.66666667 0.6984127 ]
|
|
|
|
mean value: 0.6741410735668998
|
|
|
|
key: train_fscore
|
|
value: [0.71502591 0.72125436 0.71134021 0.71134021 0.7137931 0.72887324
|
|
0.71750433 0.7137931 0.72125436 0.71502591]
|
|
|
|
mean value: 0.7169204715732834
|
|
|
|
key: test_precision
|
|
value: [0.55263158 0.52380952 0.58333333 0.525 0.5 0.52777778
|
|
0.51282051 0.53488372 0.54054054 0.55 ]
|
|
|
|
mean value: 0.535079698815929
|
|
|
|
key: train_precision
|
|
value: [0.55645161 0.5640327 0.552 0.552 0.55495979 0.5734072
|
|
0.55945946 0.55495979 0.5640327 0.55645161]
|
|
|
|
mean value: 0.5587754853622922
|
|
|
|
key: test_recall
|
|
value: [0.91304348 0.95652174 0.91304348 0.91304348 0.91304348 0.82608696
|
|
0.86956522 1. 0.86956522 0.95652174]
|
|
|
|
mean value: 0.9130434782608695
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.58695652 0.54347826 0.63043478 0.54347826 0.5 0.54347826
|
|
0.52173913 0.56521739 0.56521739 0.58695652]
|
|
|
|
mean value: 0.558695652173913
|
|
|
|
key: train_roc_auc
|
|
value: [0.60144928 0.61352657 0.5942029 0.5942029 0.59903382 0.62801932
|
|
0.60628019 0.59903382 0.61352657 0.60144928]
|
|
|
|
mean value: 0.605072463768116
|
|
|
|
key: test_jcc
|
|
value: [0.525 0.51162791 0.55263158 0.5 0.47727273 0.475
|
|
0.47619048 0.53488372 0.5 0.53658537]
|
|
|
|
mean value: 0.5089191776171207
|
|
|
|
key: train_jcc
|
|
value: [0.55645161 0.5640327 0.552 0.552 0.55495979 0.5734072
|
|
0.55945946 0.55495979 0.5640327 0.55645161]
|
|
|
|
mean value: 0.5587754853622922
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02925348 0.03742146 0.03198218 0.05191994 0.03705192 0.0369761
|
|
0.03693676 0.03724885 0.0375576 0.03748274]
|
|
|
|
mean value: 0.037383103370666505
|
|
|
|
key: score_time
|
|
value: [0.02401781 0.02413416 0.02064085 0.0163424 0.02239585 0.02346134
|
|
0.02073121 0.02163601 0.02227831 0.02286959]
|
|
|
|
mean value: 0.02185075283050537
|
|
|
|
key: test_mcc
|
|
value: [0.56736651 0.41736501 0.6092718 0.6092718 0.52223297 0.43519414
|
|
0.57396402 0.62360956 0.6092718 0.74194083]
|
|
|
|
mean value: 0.5709488428748447
|
|
|
|
key: train_mcc
|
|
value: [0.76529696 0.78415661 0.78876611 0.77404053 0.779088 0.78526132
|
|
0.76901382 0.77956276 0.77447567 0.75973177]
|
|
|
|
mean value: 0.7759393536743577
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.69565217 0.80434783 0.80434783 0.76086957 0.7173913
|
|
0.7826087 0.80434783 0.80434783 0.86956522]
|
|
|
|
mean value: 0.782608695652174
|
|
|
|
key: train_accuracy
|
|
value: [0.88164251 0.89130435 0.89371981 0.88647343 0.88888889 0.89130435
|
|
0.88405797 0.88888889 0.88647343 0.87922705]
|
|
|
|
mean value: 0.8871980676328503
|
|
|
|
key: test_fscore
|
|
value: [0.79166667 0.74074074 0.80851064 0.8 0.76595745 0.71111111
|
|
0.8 0.82352941 0.80851064 0.86363636]
|
|
|
|
mean value: 0.7913663017323843
|
|
|
|
key: train_fscore
|
|
value: [0.88578089 0.89461358 0.89671362 0.88941176 0.89201878 0.89559165
|
|
0.88679245 0.89252336 0.88992974 0.88262911]
|
|
|
|
mean value: 0.8906004943009075
|
|
|
|
key: test_precision
|
|
value: [0.76 0.64516129 0.79166667 0.81818182 0.75 0.72727273
|
|
0.74074074 0.75 0.79166667 0.9047619 ]
|
|
|
|
mean value: 0.7679451814613105
|
|
|
|
key: train_precision
|
|
value: [0.85585586 0.86818182 0.87214612 0.86697248 0.86757991 0.86160714
|
|
0.86635945 0.86425339 0.86363636 0.85844749]
|
|
|
|
mean value: 0.8645040014246903
|
|
|
|
key: test_recall
|
|
value: [0.82608696 0.86956522 0.82608696 0.7826087 0.7826087 0.69565217
|
|
0.86956522 0.91304348 0.82608696 0.82608696]
|
|
|
|
mean value: 0.8217391304347826
|
|
|
|
key: train_recall
|
|
value: [0.9178744 0.92270531 0.92270531 0.91304348 0.9178744 0.93236715
|
|
0.90821256 0.92270531 0.9178744 0.90821256]
|
|
|
|
mean value: 0.9183574879227053
|
|
|
|
key: test_roc_auc
|
|
value: [0.7826087 0.69565217 0.80434783 0.80434783 0.76086957 0.7173913
|
|
0.7826087 0.80434783 0.80434783 0.86956522]
|
|
|
|
mean value: 0.782608695652174
|
|
|
|
key: train_roc_auc
|
|
value: [0.88164251 0.89130435 0.89371981 0.88647343 0.88888889 0.89130435
|
|
0.88405797 0.88888889 0.88647343 0.87922705]
|
|
|
|
mean value: 0.8871980676328503
|
|
|
|
key: test_jcc
|
|
value: [0.65517241 0.58823529 0.67857143 0.66666667 0.62068966 0.55172414
|
|
0.66666667 0.7 0.67857143 0.76 ]
|
|
|
|
mean value: 0.6566297691490389
|
|
|
|
key: train_jcc
|
|
value: [0.79497908 0.80932203 0.81276596 0.80084746 0.80508475 0.81092437
|
|
0.79661017 0.80590717 0.80168776 0.78991597]
|
|
|
|
mean value: 0.8028044716567692
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25978518 0.2757678 0.26558924 0.27017879 0.34030342 0.40387106
|
|
0.33534956 0.35011554 0.27279735 0.26528931]
|
|
|
|
mean value: 0.30390472412109376
|
|
|
|
key: score_time
|
|
value: [0.02251339 0.02069664 0.0212121 0.02095771 0.02273417 0.02089667
|
|
0.02371645 0.02372837 0.02070165 0.02153325]
|
|
|
|
mean value: 0.02186903953552246
|
|
|
|
key: test_mcc
|
|
value: [0.54772256 0.32461723 0.6092718 0.43519414 0.52223297 0.43519414
|
|
0.57396402 0.62360956 0.6092718 0.74194083]
|
|
|
|
mean value: 0.5423019036532302
|
|
|
|
key: train_mcc
|
|
value: [0.68663964 0.83610009 0.78876611 0.71565259 0.779088 0.78526132
|
|
0.76901382 0.77956276 0.77447567 0.75973177]
|
|
|
|
mean value: 0.7674291764947905
|
|
|
|
key: test_accuracy
|
|
value: [0.76086957 0.65217391 0.80434783 0.7173913 0.76086957 0.7173913
|
|
0.7826087 0.80434783 0.80434783 0.86956522]
|
|
|
|
mean value: 0.7673913043478261
|
|
|
|
key: train_accuracy
|
|
value: [0.84299517 0.9178744 0.89371981 0.85748792 0.88888889 0.89130435
|
|
0.88405797 0.88888889 0.88647343 0.87922705]
|
|
|
|
mean value: 0.8830917874396135
|
|
|
|
key: test_fscore
|
|
value: [0.79245283 0.7037037 0.80851064 0.71111111 0.76595745 0.71111111
|
|
0.8 0.82352941 0.80851064 0.86363636]
|
|
|
|
mean value: 0.778852325491993
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.8463357 0.91904762 0.89671362 0.86052009 0.89201878 0.89559165
|
|
0.88679245 0.89252336 0.88992974 0.88262911]
|
|
|
|
mean value: 0.8862102120393928
|
|
|
|
key: test_precision
|
|
value: [0.7 0.61290323 0.79166667 0.72727273 0.75 0.72727273
|
|
0.74074074 0.75 0.79166667 0.9047619 ]
|
|
|
|
mean value: 0.7496284659187885
|
|
|
|
key: train_precision
|
|
value: [0.8287037 0.90610329 0.87214612 0.84259259 0.86757991 0.86160714
|
|
0.86635945 0.86425339 0.86363636 0.85844749]
|
|
|
|
mean value: 0.8631429445826281
|
|
|
|
key: test_recall
|
|
value: [0.91304348 0.82608696 0.82608696 0.69565217 0.7826087 0.69565217
|
|
0.86956522 0.91304348 0.82608696 0.82608696]
|
|
|
|
mean value: 0.817391304347826
|
|
|
|
key: train_recall
|
|
value: [0.8647343 0.93236715 0.92270531 0.87922705 0.9178744 0.93236715
|
|
0.90821256 0.92270531 0.9178744 0.90821256]
|
|
|
|
mean value: 0.9106280193236715
|
|
|
|
key: test_roc_auc
|
|
value: [0.76086957 0.65217391 0.80434783 0.7173913 0.76086957 0.7173913
|
|
0.7826087 0.80434783 0.80434783 0.86956522]
|
|
|
|
mean value: 0.7673913043478261
|
|
|
|
key: train_roc_auc
|
|
value: [0.84299517 0.9178744 0.89371981 0.85748792 0.88888889 0.89130435
|
|
0.88405797 0.88888889 0.88647343 0.87922705]
|
|
|
|
mean value: 0.8830917874396136
|
|
|
|
key: test_jcc
|
|
value: [0.65625 0.54285714 0.67857143 0.55172414 0.62068966 0.55172414
|
|
0.66666667 0.7 0.67857143 0.76 ]
|
|
|
|
mean value: 0.640705459770115
|
|
|
|
key: train_jcc
|
|
value: [0.73360656 0.85022026 0.81276596 0.75518672 0.80508475 0.81092437
|
|
0.79661017 0.80590717 0.80168776 0.78991597]
|
|
|
|
mean value: 0.7961909689230291
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05583358 0.03908515 0.05536318 0.1265595 0.0895493 0.03901505
|
|
0.03782654 0.03889012 0.03748798 0.03715825]
|
|
|
|
mean value: 0.05567686557769776
|
|
|
|
key: score_time
|
|
value: [0.01862597 0.01223898 0.01556611 0.01722574 0.01222587 0.01617336
|
|
0.01213646 0.01217461 0.01655126 0.01313186]
|
|
|
|
mean value: 0.014605021476745606
|
|
|
|
key: test_mcc
|
|
value: [0.438357 0.59404013 0.56360186 0.71910121 0.66853948 0.61895161
|
|
0.52419355 0.58770161 0.55544355 0.6385282 ]
|
|
|
|
mean value: 0.5908458209912425
|
|
|
|
key: train_mcc
|
|
value: [0.7266021 0.72302514 0.70930045 0.71271957 0.72152762 0.6778952
|
|
0.71725106 0.70964977 0.72431729 0.73162292]
|
|
|
|
mean value: 0.7153911111468142
|
|
|
|
key: test_accuracy
|
|
value: [0.71875 0.796875 0.78125 0.859375 0.82539683 0.80952381
|
|
0.76190476 0.79365079 0.77777778 0.80952381]
|
|
|
|
mean value: 0.7934027777777778
|
|
|
|
key: train_accuracy
|
|
value: [0.86315789 0.86140351 0.85438596 0.85614035 0.85989492 0.83887916
|
|
0.85814361 0.85464098 0.86164623 0.86514886]
|
|
|
|
mean value: 0.8573441484622238
|
|
|
|
key: test_fscore
|
|
value: [0.70967742 0.8 0.77419355 0.85714286 0.84057971 0.80645161
|
|
0.76190476 0.79365079 0.78125 0.83333333]
|
|
|
|
mean value: 0.7958184036821835
|
|
|
|
key: train_fscore
|
|
value: [0.8650519 0.86308492 0.85714286 0.85862069 0.86486486 0.84083045
|
|
0.86201022 0.8566494 0.86495726 0.86882453]
|
|
|
|
mean value: 0.8602037100062495
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.78787879 0.8 0.87096774 0.76315789 0.80645161
|
|
0.75 0.80645161 0.78125 0.75 ]
|
|
|
|
mean value: 0.7849490983690899
|
|
|
|
key: train_precision
|
|
value: [0.85324232 0.85273973 0.84121622 0.8440678 0.83660131 0.83219178
|
|
0.84053156 0.84353741 0.84333333 0.84437086]
|
|
|
|
mean value: 0.8431832318372622
|
|
|
|
key: test_recall
|
|
value: [0.6875 0.8125 0.75 0.84375 0.93548387 0.80645161
|
|
0.77419355 0.78125 0.78125 0.9375 ]
|
|
|
|
mean value: 0.8109879032258065
|
|
|
|
key: train_recall
|
|
value: [0.87719298 0.87368421 0.87368421 0.87368421 0.8951049 0.84965035
|
|
0.88461538 0.87017544 0.8877193 0.89473684]
|
|
|
|
mean value: 0.8780247822353086
|
|
|
|
key: test_roc_auc
|
|
value: [0.71875 0.796875 0.78125 0.859375 0.82711694 0.80947581
|
|
0.76209677 0.79385081 0.77772177 0.80745968]
|
|
|
|
mean value: 0.7933971774193548
|
|
|
|
key: train_roc_auc
|
|
value: [0.86315789 0.86140351 0.85438596 0.85614035 0.85983315 0.83886026
|
|
0.85809717 0.85466814 0.86169182 0.86520059]
|
|
|
|
mean value: 0.8573438841859895
|
|
|
|
key: test_jcc
|
|
value: [0.55 0.66666667 0.63157895 0.75 0.725 0.67567568
|
|
0.61538462 0.65789474 0.64102564 0.71428571]
|
|
|
|
mean value: 0.6627511997248839
|
|
|
|
key: train_jcc
|
|
value: [0.76219512 0.75914634 0.75 0.75226586 0.76190476 0.72537313
|
|
0.75748503 0.74924471 0.76204819 0.76807229]
|
|
|
|
mean value: 0.7547735445533712
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.82782364 0.99994683 0.83809996 0.88531876 1.0512445 0.87437463
|
|
0.99219942 0.85140181 0.94379377 0.99876714]
|
|
|
|
mean value: 0.9262970447540283
|
|
|
|
key: score_time
|
|
value: [0.01321125 0.01522207 0.01573372 0.01344991 0.02312303 0.01547813
|
|
0.01549244 0.01336527 0.01602554 0.01229239]
|
|
|
|
mean value: 0.015339374542236328
|
|
|
|
key: test_mcc
|
|
value: [0.62622429 0.46897905 0.6644106 0.72192954 0.69609023 0.69609023
|
|
0.58770161 0.81644514 0.71443023 0.60087592]
|
|
|
|
mean value: 0.6593176855565691
|
|
|
|
key: train_mcc
|
|
value: [0.84298269 0.86700831 0.85614562 0.88850724 0.87783724 0.8706586
|
|
0.8708281 0.87444928 0.86430436 0.8887091 ]
|
|
|
|
mean value: 0.8701430540050656
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.734375 0.828125 0.859375 0.84126984 0.84126984
|
|
0.79365079 0.9047619 0.85714286 0.79365079]
|
|
|
|
mean value: 0.8266121031746032
|
|
|
|
key: train_accuracy
|
|
value: [0.92105263 0.93333333 0.92807018 0.94385965 0.93870403 0.9352014
|
|
0.9352014 0.93695271 0.93169877 0.94395797]
|
|
|
|
mean value: 0.9348032076689096
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.73846154 0.81355932 0.86567164 0.85294118 0.85294118
|
|
0.79365079 0.9 0.86153846 0.81690141]
|
|
|
|
mean value: 0.8313847337049435
|
|
|
|
key: train_fscore
|
|
value: [0.92281304 0.93425606 0.92819615 0.94501718 0.93975904 0.93609672
|
|
0.9363167 0.93793103 0.93310463 0.94501718]
|
|
|
|
mean value: 0.935850771843356
|
|
|
|
key: test_precision
|
|
value: [0.79411765 0.72727273 0.88888889 0.82857143 0.78378378 0.78378378
|
|
0.78125 0.96428571 0.84848485 0.74358974]
|
|
|
|
mean value: 0.8144028565719742
|
|
|
|
key: train_precision
|
|
value: [0.90268456 0.92150171 0.92657343 0.92592593 0.92542373 0.92491468
|
|
0.9220339 0.9220339 0.91275168 0.92592593]
|
|
|
|
mean value: 0.9209769427712305
|
|
|
|
key: test_recall
|
|
value: [0.84375 0.75 0.75 0.90625 0.93548387 0.93548387
|
|
0.80645161 0.84375 0.875 0.90625 ]
|
|
|
|
mean value: 0.855241935483871
|
|
|
|
key: train_recall
|
|
value: [0.94385965 0.94736842 0.92982456 0.96491228 0.95454545 0.94755245
|
|
0.95104895 0.95438596 0.95438596 0.96491228]
|
|
|
|
mean value: 0.951279597595387
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.734375 0.828125 0.859375 0.84274194 0.84274194
|
|
0.79385081 0.90574597 0.85685484 0.79183468]
|
|
|
|
mean value: 0.8268145161290322
|
|
|
|
key: train_roc_auc
|
|
value: [0.92105263 0.93333333 0.92807018 0.94385965 0.93867624 0.93517973
|
|
0.9351736 0.93698319 0.93173844 0.9439946 ]
|
|
|
|
mean value: 0.9348061587535271
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.58536585 0.68571429 0.76315789 0.74358974 0.74358974
|
|
0.65789474 0.81818182 0.75675676 0.69047619]
|
|
|
|
mean value: 0.7137034715853715
|
|
|
|
key: train_jcc
|
|
value: [0.8566879 0.87662338 0.86601307 0.89576547 0.88636364 0.87987013
|
|
0.8802589 0.88311688 0.87459807 0.89576547]
|
|
|
|
mean value: 0.8795062910999956
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01493692 0.01272726 0.01099396 0.01064897 0.01025271 0.0109911
|
|
0.01050425 0.01111269 0.01149487 0.01131725]
|
|
|
|
mean value: 0.011497998237609863
|
|
|
|
key: score_time
|
|
value: [0.01239252 0.01026893 0.00940967 0.00892758 0.00909019 0.00944972
|
|
0.00938582 0.00955534 0.00960016 0.00955343]
|
|
|
|
mean value: 0.009763336181640625
|
|
|
|
key: test_mcc
|
|
value: [0.37573457 0.47082362 0.438357 0.50097943 0.33569416 0.40327957
|
|
0.36491935 0.5570134 0.39656932 0.27016129]
|
|
|
|
mean value: 0.4113531727056431
|
|
|
|
key: train_mcc
|
|
value: [0.48791156 0.48146107 0.49533484 0.4881522 0.48862061 0.48523603
|
|
0.50266258 0.51676791 0.47481655 0.40924848]
|
|
|
|
mean value: 0.48302118291664714
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.734375 0.71875 0.75 0.66666667 0.6984127
|
|
0.68253968 0.77777778 0.6984127 0.63492063]
|
|
|
|
mean value: 0.7049355158730158
|
|
|
|
key: train_accuracy
|
|
value: [0.74385965 0.74035088 0.74736842 0.74385965 0.74430823 0.74255692
|
|
0.75131349 0.75831874 0.73730298 0.69527145]
|
|
|
|
mean value: 0.7404510400344118
|
|
|
|
key: test_fscore
|
|
value: [0.67741935 0.72131148 0.70967742 0.75757576 0.67692308 0.71641791
|
|
0.67741935 0.77419355 0.70769231 0.63492063]
|
|
|
|
mean value: 0.7053550840388729
|
|
|
|
key: train_fscore
|
|
value: [0.74740484 0.74744027 0.75342466 0.74914089 0.7456446 0.74611399
|
|
0.75347222 0.76041667 0.74048443 0.64049587]
|
|
|
|
mean value: 0.7384038442996906
|
|
|
|
key: test_precision
|
|
value: [0.7 0.75862069 0.73333333 0.73529412 0.64705882 0.66666667
|
|
0.67741935 0.8 0.6969697 0.64516129]
|
|
|
|
mean value: 0.706052397296263
|
|
|
|
key: train_precision
|
|
value: [0.73720137 0.72757475 0.73578595 0.73400673 0.74305556 0.73720137
|
|
0.74827586 0.75257732 0.73037543 0.77889447]
|
|
|
|
mean value: 0.7424948804585102
|
|
|
|
key: test_recall
|
|
value: [0.65625 0.6875 0.6875 0.78125 0.70967742 0.77419355
|
|
0.67741935 0.75 0.71875 0.625 ]
|
|
|
|
mean value: 0.7067540322580645
|
|
|
|
key: train_recall
|
|
value: [0.75789474 0.76842105 0.77192982 0.76491228 0.74825175 0.75524476
|
|
0.75874126 0.76842105 0.75087719 0.54385965]
|
|
|
|
mean value: 0.7388553551711446
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.734375 0.71875 0.75 0.66733871 0.69959677
|
|
0.68245968 0.77822581 0.69808468 0.63508065]
|
|
|
|
mean value: 0.705141129032258
|
|
|
|
key: train_roc_auc
|
|
value: [0.74385965 0.74035088 0.74736842 0.74385965 0.74430131 0.74253466
|
|
0.75130045 0.7583364 0.73732671 0.69500675]
|
|
|
|
mean value: 0.7404244877929089
|
|
|
|
key: test_jcc
|
|
value: [0.51219512 0.56410256 0.55 0.6097561 0.51162791 0.55813953
|
|
0.51219512 0.63157895 0.54761905 0.46511628]
|
|
|
|
mean value: 0.546233062148368
|
|
|
|
key: train_jcc
|
|
value: [0.59668508 0.59673025 0.6043956 0.5989011 0.59444444 0.59504132
|
|
0.60445682 0.61344538 0.58791209 0.47112462]
|
|
|
|
mean value: 0.5863136708796407
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01242709 0.01062965 0.01044965 0.01063824 0.01059484 0.01070356
|
|
0.01121044 0.01069164 0.01062679 0.01107025]
|
|
|
|
mean value: 0.010904216766357422
|
|
|
|
key: score_time
|
|
value: [0.00974107 0.00936651 0.00907683 0.00916839 0.00917578 0.00919271
|
|
0.00908542 0.00898957 0.00906658 0.00909472]
|
|
|
|
mean value: 0.009195756912231446
|
|
|
|
key: test_mcc
|
|
value: [0.4113018 0.34391797 0.56694671 0.6011334 0.52679717 0.42986904
|
|
0.39656932 0.42986904 0.40025188 0.38166127]
|
|
|
|
mean value: 0.44883175921777024
|
|
|
|
key: train_mcc
|
|
value: [0.55727849 0.55087719 0.55689066 0.54886043 0.51705741 0.53466669
|
|
0.55324733 0.5320108 0.51729972 0.55982989]
|
|
|
|
mean value: 0.542801862027514
|
|
|
|
key: test_accuracy
|
|
value: [0.703125 0.671875 0.78125 0.796875 0.76190476 0.71428571
|
|
0.6984127 0.71428571 0.6984127 0.68253968]
|
|
|
|
mean value: 0.722296626984127
|
|
|
|
key: train_accuracy
|
|
value: [0.77719298 0.7754386 0.77719298 0.77368421 0.75831874 0.76707531
|
|
0.77583187 0.76532399 0.75831874 0.7793345 ]
|
|
|
|
mean value: 0.7707711924294097
|
|
|
|
key: test_fscore
|
|
value: [0.6779661 0.66666667 0.79411765 0.8115942 0.76923077 0.71875
|
|
0.68852459 0.70967742 0.72463768 0.72972973]
|
|
|
|
mean value: 0.7290894807957649
|
|
|
|
key: train_fscore
|
|
value: [0.78797997 0.7754386 0.78726968 0.78172589 0.76369863 0.77264957
|
|
0.78451178 0.77288136 0.76369863 0.78571429]
|
|
|
|
mean value: 0.7775568392250982
|
|
|
|
key: test_precision
|
|
value: [0.74074074 0.67741935 0.75 0.75675676 0.73529412 0.6969697
|
|
0.7 0.73333333 0.67567568 0.64285714]
|
|
|
|
mean value: 0.7109046818819115
|
|
|
|
key: train_precision
|
|
value: [0.75159236 0.7754386 0.75320513 0.75490196 0.74832215 0.75585284
|
|
0.75649351 0.74754098 0.7458194 0.76237624]
|
|
|
|
mean value: 0.7551543158346077
|
|
|
|
key: test_recall
|
|
value: [0.625 0.65625 0.84375 0.875 0.80645161 0.74193548
|
|
0.67741935 0.6875 0.78125 0.84375 ]
|
|
|
|
mean value: 0.7538306451612903
|
|
|
|
key: train_recall
|
|
value: [0.82807018 0.7754386 0.8245614 0.81052632 0.77972028 0.79020979
|
|
0.81468531 0.8 0.78245614 0.81052632]
|
|
|
|
mean value: 0.8016194331983806
|
|
|
|
key: test_roc_auc
|
|
value: [0.703125 0.671875 0.78125 0.796875 0.76260081 0.71471774
|
|
0.69808468 0.71471774 0.69707661 0.67993952]
|
|
|
|
mean value: 0.7220262096774194
|
|
|
|
key: train_roc_auc
|
|
value: [0.77719298 0.7754386 0.77719298 0.77368421 0.75828119 0.76703472
|
|
0.77576371 0.76538462 0.75836094 0.77938903]
|
|
|
|
mean value: 0.7707722978775611
|
|
|
|
key: test_jcc
|
|
value: [0.51282051 0.5 0.65853659 0.68292683 0.625 0.56097561
|
|
0.525 0.55 0.56818182 0.57446809]
|
|
|
|
mean value: 0.5757909440498958
|
|
|
|
key: train_jcc
|
|
value: [0.65013774 0.63323782 0.64917127 0.64166667 0.61772853 0.62952646
|
|
0.64542936 0.62983425 0.61772853 0.64705882]
|
|
|
|
mean value: 0.63615194674427
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00994682 0.01091051 0.00977302 0.00970244 0.00984812 0.0096705
|
|
0.00967932 0.01085186 0.0097518 0.01083469]
|
|
|
|
mean value: 0.010096907615661621
|
|
|
|
key: score_time
|
|
value: [0.01667738 0.01324749 0.01305437 0.01258993 0.01238918 0.01241064
|
|
0.01239824 0.0128274 0.01290751 0.0134058 ]
|
|
|
|
mean value: 0.013190793991088866
|
|
|
|
key: test_mcc
|
|
value: [0.40644851 0.25048972 0.21971769 0.4375 0.40327957 0.14664712
|
|
0.46068548 0.17439516 0.23761484 0.20588616]
|
|
|
|
mean value: 0.2942664249154479
|
|
|
|
key: train_mcc
|
|
value: [0.57138681 0.60711092 0.61328465 0.58881348 0.6061167 0.61431658
|
|
0.61350495 0.61733573 0.60786984 0.59184699]
|
|
|
|
mean value: 0.6031586655181542
|
|
|
|
key: test_accuracy
|
|
value: [0.703125 0.625 0.609375 0.71875 0.6984127 0.57142857
|
|
0.73015873 0.58730159 0.61904762 0.6031746 ]
|
|
|
|
mean value: 0.6465773809523809
|
|
|
|
key: train_accuracy
|
|
value: [0.78421053 0.80175439 0.80526316 0.79298246 0.80210158 0.8056042
|
|
0.8056042 0.80735552 0.80210158 0.79509632]
|
|
|
|
mean value: 0.800207392386395
|
|
|
|
key: test_fscore
|
|
value: [0.6984127 0.63636364 0.62686567 0.71875 0.71641791 0.59701493
|
|
0.73015873 0.59375 0.63636364 0.62686567]
|
|
|
|
mean value: 0.6580962880403178
|
|
|
|
key: train_fscore
|
|
value: [0.79465776 0.81198003 0.81407035 0.80267559 0.81008403 0.81530782
|
|
0.81407035 0.81543624 0.81198003 0.80203046]
|
|
|
|
mean value: 0.8092292670672316
|
|
|
|
key: test_precision
|
|
value: [0.70967742 0.61764706 0.6 0.71875 0.66666667 0.55555556
|
|
0.71875 0.59375 0.61764706 0.6 ]
|
|
|
|
mean value: 0.639844375922412
|
|
|
|
key: train_precision
|
|
value: [0.75796178 0.7721519 0.77884615 0.76677316 0.77993528 0.77777778
|
|
0.78135048 0.78135048 0.7721519 0.7745098 ]
|
|
|
|
mean value: 0.7742808719103773
|
|
|
|
key: test_recall
|
|
value: [0.6875 0.65625 0.65625 0.71875 0.77419355 0.64516129
|
|
0.74193548 0.59375 0.65625 0.65625 ]
|
|
|
|
mean value: 0.6786290322580645
|
|
|
|
key: train_recall
|
|
value: [0.83508772 0.85614035 0.85263158 0.84210526 0.84265734 0.85664336
|
|
0.84965035 0.85263158 0.85614035 0.83157895]
|
|
|
|
mean value: 0.8475266838424733
|
|
|
|
key: test_roc_auc
|
|
value: [0.703125 0.625 0.609375 0.71875 0.69959677 0.57258065
|
|
0.73034274 0.58719758 0.61844758 0.60231855]
|
|
|
|
mean value: 0.6466733870967741
|
|
|
|
key: train_roc_auc
|
|
value: [0.78421053 0.80175439 0.80526316 0.79298246 0.80203043 0.80551466
|
|
0.80552693 0.80743467 0.80219605 0.7951601 ]
|
|
|
|
mean value: 0.800207336523126
|
|
|
|
key: test_jcc
|
|
value: [0.53658537 0.46666667 0.45652174 0.56097561 0.55813953 0.42553191
|
|
0.575 0.42222222 0.46666667 0.45652174]
|
|
|
|
mean value: 0.4924831459203519
|
|
|
|
key: train_jcc
|
|
value: [0.65927978 0.68347339 0.68644068 0.67039106 0.68079096 0.68820225
|
|
0.68644068 0.68838527 0.68347339 0.66949153]
|
|
|
|
mean value: 0.6796368976678084
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02824759 0.03029752 0.03228378 0.03044891 0.03006387 0.02939272
|
|
0.03179431 0.03097677 0.02952504 0.03144264]
|
|
|
|
mean value: 0.03044731616973877
|
|
|
|
key: score_time
|
|
value: [0.01378345 0.0138123 0.01368737 0.014395 0.01452589 0.01469469
|
|
0.01457 0.01469707 0.01411414 0.01394987]
|
|
|
|
mean value: 0.014222979545593262
|
|
|
|
key: test_mcc
|
|
value: [0.5625 0.5625 0.5 0.65915306 0.63159952 0.42986904
|
|
0.58728587 0.5570134 0.46146899 0.56449867]
|
|
|
|
mean value: 0.5515888547643149
|
|
|
|
key: train_mcc
|
|
value: [0.70737384 0.67019194 0.70637719 0.66638172 0.69418033 0.69378974
|
|
0.68851772 0.69288686 0.70502882 0.68838037]
|
|
|
|
mean value: 0.6913108540752241
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.78125 0.75 0.828125 0.80952381 0.71428571
|
|
0.79365079 0.77777778 0.73015873 0.77777778]
|
|
|
|
mean value: 0.7743799603174604
|
|
|
|
key: train_accuracy
|
|
value: [0.85263158 0.83508772 0.85263158 0.83157895 0.84588441 0.84588441
|
|
0.84238179 0.84588441 0.85113835 0.8441331 ]
|
|
|
|
mean value: 0.8447236304421298
|
|
|
|
key: test_fscore
|
|
value: [0.78125 0.78125 0.75 0.8358209 0.82352941 0.71875
|
|
0.78688525 0.77419355 0.74626866 0.8 ]
|
|
|
|
mean value: 0.7797947758292249
|
|
|
|
key: train_fscore
|
|
value: [0.85810811 0.83566434 0.85665529 0.83946488 0.85234899 0.85185185
|
|
0.85049834 0.84982935 0.85714286 0.84521739]
|
|
|
|
mean value: 0.8496781400811892
|
|
|
|
key: test_precision
|
|
value: [0.78125 0.78125 0.75 0.8 0.75675676 0.6969697
|
|
0.8 0.8 0.71428571 0.73684211]
|
|
|
|
mean value: 0.7617354273275326
|
|
|
|
key: train_precision
|
|
value: [0.82736156 0.83275261 0.83388704 0.80191693 0.81935484 0.82142857
|
|
0.81012658 0.82724252 0.82258065 0.83793103]
|
|
|
|
mean value: 0.8234582349832773
|
|
|
|
key: test_recall
|
|
value: [0.78125 0.78125 0.75 0.875 0.90322581 0.74193548
|
|
0.77419355 0.75 0.78125 0.875 ]
|
|
|
|
mean value: 0.8013104838709677
|
|
|
|
key: train_recall
|
|
value: [0.89122807 0.83859649 0.88070175 0.88070175 0.88811189 0.88461538
|
|
0.8951049 0.87368421 0.89473684 0.85263158]
|
|
|
|
mean value: 0.8780112869586554
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.78125 0.75 0.828125 0.8109879 0.71471774
|
|
0.79334677 0.77822581 0.72933468 0.77620968]
|
|
|
|
mean value: 0.7743447580645161
|
|
|
|
key: train_roc_auc
|
|
value: [0.85263158 0.83508772 0.85263158 0.83157895 0.84581033 0.84581646
|
|
0.84228929 0.84593301 0.85121457 0.84414796]
|
|
|
|
mean value: 0.8447141455036192
|
|
|
|
key: test_jcc
|
|
value: [0.64102564 0.64102564 0.6 0.71794872 0.7 0.56097561
|
|
0.64864865 0.63157895 0.5952381 0.66666667]
|
|
|
|
mean value: 0.6403107967677929
|
|
|
|
key: train_jcc
|
|
value: [0.75147929 0.71771772 0.74925373 0.72334294 0.74269006 0.74193548
|
|
0.73988439 0.7388724 0.75 0.73192771]
|
|
|
|
mean value: 0.7387103728301385
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.97375131 2.03104806 1.21565342 1.63651228 2.01255965 2.13570857
|
|
2.18256807 2.14106631 2.27216601 2.22363091]
|
|
|
|
mean value: 1.982466459274292
|
|
|
|
key: score_time
|
|
value: [0.01512861 0.01351404 0.0125792 0.01248193 0.01263905 0.01541376
|
|
0.01556706 0.01482749 0.014992 0.01525855]
|
|
|
|
mean value: 0.014240169525146484
|
|
|
|
key: test_mcc
|
|
value: [0.59404013 0.50097943 0.625 0.65915306 0.61445255 0.55544355
|
|
0.65085805 0.62475802 0.69429215 0.58371723]
|
|
|
|
mean value: 0.6102694177898594
|
|
|
|
key: train_mcc
|
|
value: [0.9579891 0.96493604 0.86803566 0.9444412 0.95517197 0.95806136
|
|
0.9719789 0.9650692 0.9650692 0.9581815 ]
|
|
|
|
mean value: 0.950893411855093
|
|
|
|
key: test_accuracy
|
|
value: [0.796875 0.75 0.8125 0.828125 0.79365079 0.77777778
|
|
0.82539683 0.80952381 0.84126984 0.77777778]
|
|
|
|
mean value: 0.8012896825396826
|
|
|
|
key: train_accuracy
|
|
value: [0.97894737 0.98245614 0.93333333 0.97192982 0.97723292 0.97898424
|
|
0.98598949 0.98248687 0.98248687 0.97898424]
|
|
|
|
mean value: 0.9752831290134267
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.74193548 0.8125 0.8358209 0.81690141 0.77419355
|
|
0.81967213 0.8 0.85714286 0.81081081]
|
|
|
|
mean value: 0.8068977135332366
|
|
|
|
key: train_fscore
|
|
value: [0.97909408 0.98239437 0.93515358 0.97241379 0.97770154 0.97916667
|
|
0.98601399 0.9825784 0.9825784 0.97916667]
|
|
|
|
mean value: 0.9756261477085117
|
|
|
|
key: test_precision
|
|
value: [0.78787879 0.76666667 0.8125 0.8 0.725 0.77419355
|
|
0.83333333 0.85714286 0.78947368 0.71428571]
|
|
|
|
mean value: 0.7860474591904982
|
|
|
|
key: train_precision
|
|
value: [0.97231834 0.98586572 0.910299 0.9559322 0.95959596 0.97241379
|
|
0.98601399 0.97577855 0.97577855 0.96907216]
|
|
|
|
mean value: 0.9663068267281514
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.71875 0.8125 0.875 0.93548387 0.77419355
|
|
0.80645161 0.75 0.9375 0.9375 ]
|
|
|
|
mean value: 0.8359879032258064
|
|
|
|
key: train_recall
|
|
value: [0.98596491 0.97894737 0.96140351 0.98947368 0.9965035 0.98601399
|
|
0.98601399 0.98947368 0.98947368 0.98947368]
|
|
|
|
mean value: 0.9852741994847258
|
|
|
|
key: test_roc_auc
|
|
value: [0.796875 0.75 0.8125 0.828125 0.79586694 0.77772177
|
|
0.82510081 0.81048387 0.83971774 0.77520161]
|
|
|
|
mean value: 0.8011592741935484
|
|
|
|
key: train_roc_auc
|
|
value: [0.97894737 0.98245614 0.93333333 0.97192982 0.97719912 0.97897191
|
|
0.98598945 0.98249908 0.98249908 0.97900258]
|
|
|
|
mean value: 0.9752827873880505
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.58974359 0.68421053 0.71794872 0.69047619 0.63157895
|
|
0.69444444 0.66666667 0.75 0.68181818]
|
|
|
|
mean value: 0.6773553931448668
|
|
|
|
key: train_jcc
|
|
value: [0.95904437 0.96539792 0.87820513 0.94630872 0.95637584 0.95918367
|
|
0.97241379 0.96575342 0.96575342 0.95918367]
|
|
|
|
mean value: 0.9527619973796925
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04272532 0.03708005 0.03343678 0.0325079 0.0369761 0.03386879
|
|
0.03489923 0.03392696 0.03346109 0.03364778]
|
|
|
|
mean value: 0.03525300025939941
|
|
|
|
key: score_time
|
|
value: [0.0112803 0.00947237 0.00918174 0.00996089 0.0099721 0.00926304
|
|
0.00975418 0.00947094 0.0093956 0.00996923]
|
|
|
|
mean value: 0.009772038459777832
|
|
|
|
key: test_mcc
|
|
value: [0.84748251 0.75592895 0.71910121 0.78163175 0.79833297 0.74596774
|
|
0.80947581 0.74772995 0.75156646 0.52371369]
|
|
|
|
mean value: 0.7480931026695484
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.921875 0.875 0.859375 0.890625 0.88888889 0.87301587
|
|
0.9047619 0.87301587 0.87301587 0.76190476]
|
|
|
|
mean value: 0.8721478174603174
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92537313 0.86666667 0.85714286 0.88888889 0.89855072 0.87096774
|
|
0.90322581 0.87096774 0.88235294 0.76923077]
|
|
|
|
mean value: 0.8733367272394272
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88571429 0.92857143 0.87096774 0.90322581 0.81578947 0.87096774
|
|
0.90322581 0.9 0.83333333 0.75757576]
|
|
|
|
mean value: 0.866937137565321
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96875 0.8125 0.84375 0.875 1. 0.87096774
|
|
0.90322581 0.84375 0.9375 0.78125 ]
|
|
|
|
mean value: 0.8836693548387097
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.875 0.859375 0.890625 0.890625 0.87298387
|
|
0.9047379 0.8734879 0.87197581 0.76159274]
|
|
|
|
mean value: 0.8722278225806451
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.86111111 0.76470588 0.75 0.8 0.81578947 0.77142857
|
|
0.82352941 0.77142857 0.78947368 0.625 ]
|
|
|
|
mean value: 0.7772466705980638
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1539166 0.15231609 0.15183663 0.15490413 0.15403032 0.14877081
|
|
0.15526795 0.15582466 0.15328646 0.15435219]
|
|
|
|
mean value: 0.1534505844116211
|
|
|
|
key: score_time
|
|
value: [0.01990318 0.02001739 0.01951146 0.02007031 0.01984763 0.01978254
|
|
0.0202117 0.02009964 0.01998091 0.01975799]
|
|
|
|
mean value: 0.019918274879455567
|
|
|
|
key: test_mcc
|
|
value: [0.71910121 0.5625 0.62622429 0.68884672 0.63159952 0.53159579
|
|
0.61982085 0.68245968 0.55544355 0.56449867]
|
|
|
|
mean value: 0.6182090274709215
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.859375 0.78125 0.8125 0.84375 0.80952381 0.76190476
|
|
0.80952381 0.84126984 0.77777778 0.77777778]
|
|
|
|
mean value: 0.8074652777777778
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86153846 0.78125 0.81818182 0.84848485 0.82352941 0.7761194
|
|
0.8 0.84375 0.78125 0.8 ]
|
|
|
|
mean value: 0.8134103942954909
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84848485 0.78125 0.79411765 0.82352941 0.75675676 0.72222222
|
|
0.82758621 0.84375 0.78125 0.73684211]
|
|
|
|
mean value: 0.7915789198447066
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.78125 0.84375 0.875 0.90322581 0.83870968
|
|
0.77419355 0.84375 0.78125 0.875 ]
|
|
|
|
mean value: 0.8391129032258065
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.859375 0.78125 0.8125 0.84375 0.8109879 0.76310484
|
|
0.80897177 0.84122984 0.77772177 0.77620968]
|
|
|
|
mean value: 0.8075100806451613
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75675676 0.64102564 0.69230769 0.73684211 0.7 0.63414634
|
|
0.66666667 0.72972973 0.64102564 0.66666667]
|
|
|
|
mean value: 0.6865167240905367
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01775193 0.01068568 0.01076674 0.01071692 0.01126504 0.01195621
|
|
0.01207042 0.01220584 0.01247931 0.01210117]
|
|
|
|
mean value: 0.012199926376342773
|
|
|
|
key: score_time
|
|
value: [0.00915742 0.00898027 0.00895357 0.00890779 0.00897694 0.00968862
|
|
0.00974393 0.00967693 0.00982428 0.00974512]
|
|
|
|
mean value: 0.009365487098693847
|
|
|
|
key: test_mcc
|
|
value: [0.4375 0.56694671 0.50097943 0.53150959 0.5026181 0.68415777
|
|
0.39717742 0.56086231 0.46309616 0.35186681]
|
|
|
|
mean value: 0.4996714294281916
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71875 0.78125 0.75 0.765625 0.74603175 0.84126984
|
|
0.6984127 0.77777778 0.73015873 0.66666667]
|
|
|
|
mean value: 0.7475942460317461
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.71875 0.76666667 0.75757576 0.76190476 0.76470588 0.84375
|
|
0.6984127 0.76666667 0.72131148 0.72 ]
|
|
|
|
mean value: 0.7519743908989328
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71875 0.82142857 0.73529412 0.77419355 0.7027027 0.81818182
|
|
0.6875 0.82142857 0.75862069 0.62790698]
|
|
|
|
mean value: 0.7466006996175177
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71875 0.71875 0.78125 0.75 0.83870968 0.87096774
|
|
0.70967742 0.71875 0.6875 0.84375 ]
|
|
|
|
mean value: 0.7638104838709677
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.71875 0.78125 0.75 0.765625 0.74747984 0.84173387
|
|
0.69858871 0.77872984 0.73084677 0.66381048]
|
|
|
|
mean value: 0.7476814516129032
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.56097561 0.62162162 0.6097561 0.61538462 0.61904762 0.72972973
|
|
0.53658537 0.62162162 0.56410256 0.5625 ]
|
|
|
|
mean value: 0.6041324844678503
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.1599102 2.15545416 2.1110487 2.12321568 2.14602542 2.12714887
|
|
2.07959056 2.1073513 2.10407662 2.11057091]
|
|
|
|
mean value: 2.122439241409302
|
|
|
|
key: score_time
|
|
value: [0.10290813 0.10166049 0.09488583 0.10192132 0.10095882 0.09461808
|
|
0.09486747 0.09576178 0.14553905 0.09514618]
|
|
|
|
mean value: 0.10282671451568604
|
|
|
|
key: test_mcc
|
|
value: [0.93933644 0.75146915 0.78163175 0.90669283 0.82507166 0.72407013
|
|
0.78160117 0.90524194 0.77800241 0.78719616]
|
|
|
|
mean value: 0.8180313634790382
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.875 0.890625 0.953125 0.9047619 0.85714286
|
|
0.88888889 0.95238095 0.88888889 0.88888889]
|
|
|
|
mean value: 0.9068452380952381
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96969697 0.87878788 0.88888889 0.95238095 0.91176471 0.86567164
|
|
0.89230769 0.95238095 0.89230769 0.89855072]
|
|
|
|
mean value: 0.9102738099062105
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.85294118 0.90322581 0.96774194 0.83783784 0.80555556
|
|
0.85294118 0.96774194 0.87878788 0.83783784]
|
|
|
|
mean value: 0.8845787610967877
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.90625 0.875 0.9375 1. 0.93548387
|
|
0.93548387 0.9375 0.90625 0.96875 ]
|
|
|
|
mean value: 0.9402217741935484
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.875 0.890625 0.953125 0.90625 0.85836694
|
|
0.88961694 0.95262097 0.88860887 0.88760081]
|
|
|
|
mean value: 0.9070564516129033
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94117647 0.78378378 0.8 0.90909091 0.83783784 0.76315789
|
|
0.80555556 0.90909091 0.80555556 0.81578947]
|
|
|
|
mean value: 0.8371038389923839
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.00224113 1.03473115 1.0149684 1.03896141 1.01002407 1.00975657
|
|
1.02722478 1.11543322 1.08370209 1.00851583]
|
|
|
|
mean value: 1.0345558643341064
|
|
|
|
key: score_time
|
|
value: [0.28700686 0.21716261 0.2733736 0.23618817 0.25626564 0.19256282
|
|
0.22805929 0.24182963 0.19579506 0.23420334]
|
|
|
|
mean value: 0.23624470233917236
|
|
|
|
key: test_mcc
|
|
value: [0.90669283 0.72192954 0.75146915 0.84748251 0.78822824 0.72407013
|
|
0.81130213 0.84173387 0.77800241 0.75156646]
|
|
|
|
mean value: 0.7922477281849055
|
|
|
|
key: train_mcc
|
|
value: [0.94133067 0.9340293 0.9233756 0.93039747 0.93777673 0.94115006
|
|
0.92690126 0.92690929 0.93052245 0.93031595]
|
|
|
|
mean value: 0.932270878516129
|
|
|
|
key: test_accuracy
|
|
value: [0.953125 0.859375 0.875 0.921875 0.88888889 0.85714286
|
|
0.9047619 0.92063492 0.88888889 0.87301587]
|
|
|
|
mean value: 0.8942708333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.97017544 0.96666667 0.96140351 0.96491228 0.96847636 0.97022767
|
|
0.96322242 0.96322242 0.96497373 0.96497373]
|
|
|
|
mean value: 0.9658254216978523
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.86567164 0.87096774 0.91803279 0.89552239 0.86567164
|
|
0.90625 0.92063492 0.89230769 0.88235294]
|
|
|
|
mean value: 0.8971257908427758
|
|
|
|
key: train_fscore
|
|
value: [0.97084048 0.96729776 0.96206897 0.96551724 0.96917808 0.97084048
|
|
0.96385542 0.96373057 0.96551724 0.96539792]
|
|
|
|
mean value: 0.9664244169005379
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.82857143 0.9 0.96551724 0.83333333 0.80555556
|
|
0.87878788 0.93548387 0.87878788 0.83333333]
|
|
|
|
mean value: 0.87987644601104
|
|
|
|
key: train_precision
|
|
value: [0.94966443 0.94932432 0.94576271 0.94915254 0.94966443 0.95286195
|
|
0.94915254 0.94897959 0.94915254 0.95221843]
|
|
|
|
mean value: 0.9495933497100595
|
|
|
|
key: test_recall
|
|
value: [0.96875 0.90625 0.84375 0.875 0.96774194 0.93548387
|
|
0.93548387 0.90625 0.90625 0.9375 ]
|
|
|
|
mean value: 0.9182459677419355
|
|
|
|
key: train_recall
|
|
value: [0.99298246 0.98596491 0.97894737 0.98245614 0.98951049 0.98951049
|
|
0.97902098 0.97894737 0.98245614 0.97894737]
|
|
|
|
mean value: 0.9838743712427923
|
|
|
|
key: test_roc_auc
|
|
value: [0.953125 0.859375 0.875 0.921875 0.89012097 0.85836694
|
|
0.90524194 0.92086694 0.88860887 0.87197581]
|
|
|
|
mean value: 0.8944556451612904
|
|
|
|
key: train_roc_auc
|
|
value: [0.97017544 0.96666667 0.96140351 0.96491228 0.96843946 0.97019384
|
|
0.9631947 0.96324991 0.96500429 0.96499816]
|
|
|
|
mean value: 0.9658238252975095
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.76315789 0.77142857 0.84848485 0.81081081 0.76315789
|
|
0.82857143 0.85294118 0.80555556 0.78947368]
|
|
|
|
mean value: 0.8145346570888367
|
|
|
|
key: train_jcc
|
|
value: [0.94333333 0.93666667 0.9269103 0.93333333 0.94019934 0.94333333
|
|
0.93023256 0.93 0.93333333 0.93311037]
|
|
|
|
mean value: 0.9350452560584006
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02679944 0.01057577 0.01054859 0.01065159 0.01063442 0.01070833
|
|
0.01059222 0.0106802 0.01059055 0.01068306]
|
|
|
|
mean value: 0.012246417999267577
|
|
|
|
key: score_time
|
|
value: [0.00978088 0.0090704 0.01042318 0.00912976 0.00906348 0.00907946
|
|
0.00904584 0.00904346 0.00901008 0.0090301 ]
|
|
|
|
mean value: 0.009267663955688477
|
|
|
|
key: test_mcc
|
|
value: [0.4113018 0.34391797 0.56694671 0.6011334 0.52679717 0.42986904
|
|
0.39656932 0.42986904 0.40025188 0.38166127]
|
|
|
|
mean value: 0.44883175921777024
|
|
|
|
key: train_mcc
|
|
value: [0.55727849 0.55087719 0.55689066 0.54886043 0.51705741 0.53466669
|
|
0.55324733 0.5320108 0.51729972 0.55982989]
|
|
|
|
mean value: 0.542801862027514
|
|
|
|
key: test_accuracy
|
|
value: [0.703125 0.671875 0.78125 0.796875 0.76190476 0.71428571
|
|
0.6984127 0.71428571 0.6984127 0.68253968]
|
|
|
|
mean value: 0.722296626984127
|
|
|
|
key: train_accuracy
|
|
value: [0.77719298 0.7754386 0.77719298 0.77368421 0.75831874 0.76707531
|
|
0.77583187 0.76532399 0.75831874 0.7793345 ]
|
|
|
|
mean value: 0.7707711924294097
|
|
|
|
key: test_fscore
|
|
value: [0.6779661 0.66666667 0.79411765 0.8115942 0.76923077 0.71875
|
|
0.68852459 0.70967742 0.72463768 0.72972973]
|
|
|
|
mean value: 0.7290894807957649
|
|
|
|
key: train_fscore
|
|
value: [0.78797997 0.7754386 0.78726968 0.78172589 0.76369863 0.77264957
|
|
0.78451178 0.77288136 0.76369863 0.78571429]
|
|
|
|
mean value: 0.7775568392250982
|
|
|
|
key: test_precision
|
|
value: [0.74074074 0.67741935 0.75 0.75675676 0.73529412 0.6969697
|
|
0.7 0.73333333 0.67567568 0.64285714]
|
|
|
|
mean value: 0.7109046818819115
|
|
|
|
key: train_precision
|
|
value: [0.75159236 0.7754386 0.75320513 0.75490196 0.74832215 0.75585284
|
|
0.75649351 0.74754098 0.7458194 0.76237624]
|
|
|
|
mean value: 0.7551543158346077
|
|
|
|
key: test_recall
|
|
value: [0.625 0.65625 0.84375 0.875 0.80645161 0.74193548
|
|
0.67741935 0.6875 0.78125 0.84375 ]
|
|
|
|
mean value: 0.7538306451612903
|
|
|
|
key: train_recall
|
|
value: [0.82807018 0.7754386 0.8245614 0.81052632 0.77972028 0.79020979
|
|
0.81468531 0.8 0.78245614 0.81052632]
|
|
|
|
mean value: 0.8016194331983806
|
|
|
|
key: test_roc_auc
|
|
value: [0.703125 0.671875 0.78125 0.796875 0.76260081 0.71471774
|
|
0.69808468 0.71471774 0.69707661 0.67993952]
|
|
|
|
mean value: 0.7220262096774194
|
|
|
|
key: train_roc_auc
|
|
value: [0.77719298 0.7754386 0.77719298 0.77368421 0.75828119 0.76703472
|
|
0.77576371 0.76538462 0.75836094 0.77938903]
|
|
|
|
mean value: 0.7707722978775611
|
|
|
|
key: test_jcc
|
|
value: [0.51282051 0.5 0.65853659 0.68292683 0.625 0.56097561
|
|
0.525 0.55 0.56818182 0.57446809]
|
|
|
|
mean value: 0.5757909440498958
|
|
|
|
key: train_jcc
|
|
value: [0.65013774 0.63323782 0.64917127 0.64166667 0.61772853 0.62952646
|
|
0.64542936 0.62983425 0.61772853 0.64705882]
|
|
|
|
mean value: 0.63615194674427
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.1093576 0.09363127 0.09794664 0.09333253 0.24359059 0.09687424
|
|
0.09973836 0.10169721 0.12564659 0.0984056 ]
|
|
|
|
mean value: 0.11602206230163574
|
|
|
|
key: score_time
|
|
value: [0.01119781 0.01130223 0.01117301 0.01125622 0.01114964 0.01119208
|
|
0.01138306 0.01121926 0.01116014 0.01131511]
|
|
|
|
mean value: 0.011234855651855469
|
|
|
|
key: test_mcc
|
|
value: [0.93933644 0.84416229 0.78470603 0.8125 0.84530217 0.81130213
|
|
0.84530217 0.96871896 0.84173387 0.74722285]
|
|
|
|
mean value: 0.8440286901864472
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.921875 0.890625 0.90625 0.92063492 0.9047619
|
|
0.92063492 0.98412698 0.92063492 0.87301587]
|
|
|
|
mean value: 0.9211309523809523
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96969697 0.92063492 0.8852459 0.90625 0.92307692 0.90625
|
|
0.92307692 0.98461538 0.92063492 0.87878788]
|
|
|
|
mean value: 0.9218269822163264
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.93548387 0.93103448 0.90625 0.88235294 0.87878788
|
|
0.88235294 0.96969697 0.93548387 0.85294118]
|
|
|
|
mean value: 0.9115560602590718
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.90625 0.84375 0.90625 0.96774194 0.93548387
|
|
0.96774194 1. 0.90625 0.90625 ]
|
|
|
|
mean value: 0.9339717741935484
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.921875 0.890625 0.90625 0.92137097 0.90524194
|
|
0.92137097 0.98387097 0.92086694 0.87247984]
|
|
|
|
mean value: 0.9212701612903226
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94117647 0.85294118 0.79411765 0.82857143 0.85714286 0.82857143
|
|
0.85714286 0.96969697 0.85294118 0.78378378]
|
|
|
|
mean value: 0.856608579549756
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0467267 0.1044414 0.06506896 0.09787321 0.08672071 0.05564761
|
|
0.08345127 0.05584645 0.08797264 0.09393358]
|
|
|
|
mean value: 0.07776825428009033
|
|
|
|
key: score_time
|
|
value: [0.01902127 0.02189255 0.01270485 0.02597713 0.03544736 0.01601791
|
|
0.01896071 0.01225686 0.02112865 0.01939201]
|
|
|
|
mean value: 0.020279932022094726
|
|
|
|
key: test_mcc
|
|
value: [0.53150959 0.40644851 0.62994079 0.75 0.60364273 0.62475802
|
|
0.52419355 0.61895161 0.68740835 0.62469891]
|
|
|
|
mean value: 0.6001552066732136
|
|
|
|
key: train_mcc
|
|
value: [0.80059654 0.83258409 0.83571043 0.81585011 0.82950084 0.83562019
|
|
0.82558492 0.81989894 0.79756669 0.82986082]
|
|
|
|
mean value: 0.8222773553965407
|
|
|
|
key: test_accuracy
|
|
value: [0.765625 0.703125 0.8125 0.875 0.79365079 0.80952381
|
|
0.76190476 0.80952381 0.84126984 0.79365079]
|
|
|
|
mean value: 0.796577380952381
|
|
|
|
key: train_accuracy
|
|
value: [0.9 0.91578947 0.91754386 0.90701754 0.91418564 0.91768827
|
|
0.91243433 0.9089317 0.89842382 0.91418564]
|
|
|
|
mean value: 0.9106200264233263
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.70769231 0.8 0.875 0.8115942 0.81818182
|
|
0.76190476 0.8125 0.85294118 0.82666667]
|
|
|
|
mean value: 0.8028385695719455
|
|
|
|
key: train_fscore
|
|
value: [0.90189329 0.91780822 0.91910499 0.91001698 0.9165247 0.91882556
|
|
0.91438356 0.91186441 0.90034364 0.9165247 ]
|
|
|
|
mean value: 0.9127290052032038
|
|
|
|
key: test_precision
|
|
value: [0.77419355 0.6969697 0.85714286 0.875 0.73684211 0.77142857
|
|
0.75 0.8125 0.80555556 0.72093023]
|
|
|
|
mean value: 0.7800562567305075
|
|
|
|
key: train_precision
|
|
value: [0.88513514 0.89632107 0.90202703 0.88157895 0.89368771 0.90784983
|
|
0.89597315 0.88196721 0.88215488 0.89072848]
|
|
|
|
mean value: 0.8917423443210672
|
|
|
|
key: test_recall
|
|
value: [0.75 0.71875 0.75 0.875 0.90322581 0.87096774
|
|
0.77419355 0.8125 0.90625 0.96875 ]
|
|
|
|
mean value: 0.8329637096774194
|
|
|
|
key: train_recall
|
|
value: [0.91929825 0.94035088 0.93684211 0.94035088 0.94055944 0.93006993
|
|
0.93356643 0.94385965 0.91929825 0.94385965]
|
|
|
|
mean value: 0.9348055453318611
|
|
|
|
key: test_roc_auc
|
|
value: [0.765625 0.703125 0.8125 0.875 0.7953629 0.81048387
|
|
0.76209677 0.80947581 0.84022177 0.79082661]
|
|
|
|
mean value: 0.7964717741935484
|
|
|
|
key: train_roc_auc
|
|
value: [0.9 0.91578947 0.91754386 0.90701754 0.91413937 0.91766654
|
|
0.91239725 0.90899276 0.89846031 0.91423752]
|
|
|
|
mean value: 0.9106244632560422
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.54761905 0.66666667 0.77777778 0.68292683 0.69230769
|
|
0.61538462 0.68421053 0.74358974 0.70454545]
|
|
|
|
mean value: 0.6730412968859696
|
|
|
|
key: train_jcc
|
|
value: [0.82131661 0.84810127 0.85031847 0.83489097 0.84591195 0.84984026
|
|
0.84227129 0.83800623 0.81875 0.84591195]
|
|
|
|
mean value: 0.8395318996179627
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01548767 0.01054072 0.01008558 0.0100286 0.01017189 0.01033568
|
|
0.01030731 0.01033974 0.01020432 0.01021051]
|
|
|
|
mean value: 0.01077120304107666
|
|
|
|
key: score_time
|
|
value: [0.01049852 0.00919342 0.00898242 0.00896382 0.00888681 0.00889635
|
|
0.00888109 0.00895309 0.00899196 0.0088768 ]
|
|
|
|
mean value: 0.00911242961883545
|
|
|
|
key: test_mcc
|
|
value: [0.40644851 0.5336001 0.46897905 0.56360186 0.5485062 0.39656932
|
|
0.58770161 0.61982085 0.50663549 0.58371723]
|
|
|
|
mean value: 0.5215580225467669
|
|
|
|
key: train_mcc
|
|
value: [0.57766536 0.54579833 0.54401001 0.51829689 0.54017741 0.56212452
|
|
0.54056044 0.5519492 0.56071673 0.55034238]
|
|
|
|
mean value: 0.5491641282086239
|
|
|
|
key: test_accuracy
|
|
value: [0.703125 0.765625 0.734375 0.78125 0.76190476 0.6984127
|
|
0.79365079 0.80952381 0.74603175 0.77777778]
|
|
|
|
mean value: 0.7571676587301587
|
|
|
|
key: train_accuracy
|
|
value: [0.7877193 0.77192982 0.77017544 0.75789474 0.76882662 0.7793345
|
|
0.76882662 0.77408056 0.7793345 0.77408056]
|
|
|
|
mean value: 0.7732202660767505
|
|
|
|
key: test_fscore
|
|
value: [0.6984127 0.7761194 0.73846154 0.78787879 0.78873239 0.68852459
|
|
0.79365079 0.81818182 0.77777778 0.81081081]
|
|
|
|
mean value: 0.7678550612689431
|
|
|
|
key: train_fscore
|
|
value: [0.79663866 0.78114478 0.7827529 0.76923077 0.78 0.79139073
|
|
0.7807309 0.78606965 0.78787879 0.78319328]
|
|
|
|
mean value: 0.7839030450411416
|
|
|
|
key: test_precision
|
|
value: [0.70967742 0.74285714 0.72727273 0.76470588 0.7 0.7
|
|
0.78125 0.79411765 0.7 0.71428571]
|
|
|
|
mean value: 0.7334166533182188
|
|
|
|
key: train_precision
|
|
value: [0.76451613 0.75080906 0.74213836 0.73482428 0.74522293 0.75157233
|
|
0.74367089 0.74528302 0.75728155 0.7516129 ]
|
|
|
|
mean value: 0.7486931454999034
|
|
|
|
key: test_recall
|
|
value: [0.6875 0.8125 0.75 0.8125 0.90322581 0.67741935
|
|
0.80645161 0.84375 0.875 0.9375 ]
|
|
|
|
mean value: 0.8105846774193548
|
|
|
|
key: train_recall
|
|
value: [0.83157895 0.81403509 0.82807018 0.80701754 0.81818182 0.83566434
|
|
0.82167832 0.83157895 0.82105263 0.81754386]
|
|
|
|
mean value: 0.8226401668506932
|
|
|
|
key: test_roc_auc
|
|
value: [0.703125 0.765625 0.734375 0.78125 0.7641129 0.69808468
|
|
0.79385081 0.80897177 0.74395161 0.77520161]
|
|
|
|
mean value: 0.7568548387096774
|
|
|
|
key: train_roc_auc
|
|
value: [0.7877193 0.77192982 0.77017544 0.75789474 0.76874003 0.77923568
|
|
0.7687339 0.77418108 0.77940743 0.77415655]
|
|
|
|
mean value: 0.7732173966384492
|
|
|
|
key: test_jcc
|
|
value: [0.53658537 0.63414634 0.58536585 0.65 0.65116279 0.525
|
|
0.65789474 0.69230769 0.63636364 0.68181818]
|
|
|
|
mean value: 0.62506445990049
|
|
|
|
key: train_jcc
|
|
value: [0.66201117 0.64088398 0.64305177 0.625 0.63934426 0.65479452
|
|
0.64032698 0.64754098 0.65 0.64364641]
|
|
|
|
mean value: 0.6446600072968279
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0133729 0.02345967 0.01946425 0.02447844 0.02136731 0.02070236
|
|
0.02127075 0.02006936 0.02137899 0.02137303]
|
|
|
|
mean value: 0.020693707466125488
|
|
|
|
key: score_time
|
|
value: [0.01019573 0.01148057 0.01190829 0.01195073 0.01189804 0.01192021
|
|
0.01198316 0.01190925 0.01193452 0.01192427]
|
|
|
|
mean value: 0.011710476875305176
|
|
|
|
key: test_mcc
|
|
value: [0.50395263 0.48038446 0.43033148 0.6644106 0.52371369 0.42753131
|
|
0.33112209 0.49391458 0.125 0.43960456]
|
|
|
|
mean value: 0.44199654030263313
|
|
|
|
key: train_mcc
|
|
value: [0.71010764 0.67515502 0.39816653 0.68967448 0.70150161 0.41409554
|
|
0.44601782 0.62481351 0.17548103 0.72805889]
|
|
|
|
mean value: 0.5563072070052836
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.734375 0.65625 0.828125 0.76190476 0.65079365
|
|
0.63492063 0.73015873 0.50793651 0.68253968]
|
|
|
|
mean value: 0.6937003968253969
|
|
|
|
key: train_accuracy
|
|
value: [0.84385965 0.82631579 0.63684211 0.83333333 0.8441331 0.65148862
|
|
0.67950963 0.80210158 0.53064799 0.84938704]
|
|
|
|
mean value: 0.7497618828156205
|
|
|
|
key: test_fscore
|
|
value: [0.76470588 0.70175439 0.74418605 0.84057971 0.75409836 0.73809524
|
|
0.46511628 0.67924528 0.06060606 0.75609756]
|
|
|
|
mean value: 0.650448480739569
|
|
|
|
key: train_fscore
|
|
value: [0.86115445 0.80080483 0.73359073 0.85225505 0.827853 0.74054759
|
|
0.54590571 0.77263581 0.11258278 0.86769231]
|
|
|
|
mean value: 0.7115022260480378
|
|
|
|
key: test_precision
|
|
value: [0.72222222 0.8 0.59259259 0.78378378 0.76666667 0.58490566
|
|
0.83333333 0.85714286 1. 0.62 ]
|
|
|
|
mean value: 0.7560647116118814
|
|
|
|
key: train_precision
|
|
value: [0.7752809 0.93867925 0.57926829 0.76536313 0.92640693 0.59043659
|
|
0.94017094 0.90566038 1. 0.77260274]
|
|
|
|
mean value: 0.8193869139432945
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.625 1. 0.90625 0.74193548 1.
|
|
0.32258065 0.5625 0.03125 0.96875 ]
|
|
|
|
mean value: 0.6970766129032258
|
|
|
|
key: train_recall
|
|
value: [0.96842105 0.69824561 1. 0.96140351 0.74825175 0.99300699
|
|
0.38461538 0.67368421 0.05964912 0.98947368]
|
|
|
|
mean value: 0.7476751318856582
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.734375 0.65625 0.828125 0.76159274 0.65625
|
|
0.63004032 0.7328629 0.515625 0.67792339]
|
|
|
|
mean value: 0.694304435483871
|
|
|
|
key: train_roc_auc
|
|
value: [0.84385965 0.82631579 0.63684211 0.83333333 0.84430131 0.65088946
|
|
0.68002699 0.80187707 0.52982456 0.84963195]
|
|
|
|
mean value: 0.7496902220586431
|
|
|
|
key: test_jcc
|
|
value: [0.61904762 0.54054054 0.59259259 0.725 0.60526316 0.58490566
|
|
0.3030303 0.51428571 0.03125 0.60784314]
|
|
|
|
mean value: 0.5123758725023767
|
|
|
|
key: train_jcc
|
|
value: [0.75616438 0.66778523 0.57926829 0.74254743 0.70627063 0.58799172
|
|
0.37542662 0.6295082 0.05964912 0.76630435]
|
|
|
|
mean value: 0.5870915970622187
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02321935 0.03090262 0.02621317 0.02358508 0.0311234 0.02431655
|
|
0.02363515 0.0268414 0.02199745 0.0251801 ]
|
|
|
|
mean value: 0.025701427459716798
|
|
|
|
key: score_time
|
|
value: [0.01190567 0.01201439 0.01202464 0.01195478 0.01194811 0.01680708
|
|
0.01229119 0.01195621 0.01197958 0.01200938]
|
|
|
|
mean value: 0.012489104270935058
|
|
|
|
key: test_mcc
|
|
value: [0.56360186 0.32025631 0.65915306 0.625 0.40305948 0.65821474
|
|
0.34495882 0.77800241 0.48255984 0.47783651]
|
|
|
|
mean value: 0.5312643032230694
|
|
|
|
key: train_mcc
|
|
value: [0.76440851 0.5666306 0.71428268 0.74152034 0.54722946 0.72499438
|
|
0.43617489 0.77291774 0.67680962 0.78291863]
|
|
|
|
mean value: 0.6727886849719236
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.625 0.828125 0.8125 0.68253968 0.82539683
|
|
0.63492063 0.88888889 0.73015873 0.73015873]
|
|
|
|
mean value: 0.7538938492063492
|
|
|
|
key: train_accuracy
|
|
value: [0.87719298 0.75263158 0.84385965 0.87017544 0.73204904 0.85639229
|
|
0.66024518 0.88616462 0.83012259 0.89141856]
|
|
|
|
mean value: 0.8200251943343473
|
|
|
|
key: test_fscore
|
|
value: [0.78787879 0.45454545 0.8358209 0.8125 0.58333333 0.80701754
|
|
0.71604938 0.89230769 0.69090909 0.76712329]
|
|
|
|
mean value: 0.7347485468743679
|
|
|
|
key: train_fscore
|
|
value: [0.88636364 0.68027211 0.86244204 0.86642599 0.63657957 0.84230769
|
|
0.74673629 0.88812392 0.8086785 0.89198606]
|
|
|
|
mean value: 0.810991582332734
|
|
|
|
key: test_precision
|
|
value: [0.76470588 0.83333333 0.8 0.8125 0.82352941 0.88461538
|
|
0.58 0.87878788 0.82608696 0.68292683]
|
|
|
|
mean value: 0.7886485676644276
|
|
|
|
key: train_precision
|
|
value: [0.82477341 0.96153846 0.77071823 0.89219331 0.99259259 0.93589744
|
|
0.59583333 0.87162162 0.92342342 0.88581315]
|
|
|
|
mean value: 0.8654404971687462
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.3125 0.875 0.8125 0.4516129 0.74193548
|
|
0.93548387 0.90625 0.59375 0.875 ]
|
|
|
|
mean value: 0.7316532258064516
|
|
|
|
key: train_recall
|
|
value: [0.95789474 0.52631579 0.97894737 0.84210526 0.46853147 0.76573427
|
|
1. 0.90526316 0.71929825 0.89824561]
|
|
|
|
mean value: 0.8062335909704331
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.625 0.828125 0.8125 0.67893145 0.82409274
|
|
0.63961694 0.88860887 0.73235887 0.72782258]
|
|
|
|
mean value: 0.7538306451612904
|
|
|
|
key: train_roc_auc
|
|
value: [0.87719298 0.75263158 0.84385965 0.87017544 0.73251135 0.85655134
|
|
0.65964912 0.88619801 0.82992884 0.8914305 ]
|
|
|
|
mean value: 0.8200128818549871
|
|
|
|
key: test_jcc
|
|
value: [0.65 0.29411765 0.71794872 0.68421053 0.41176471 0.67647059
|
|
0.55769231 0.80555556 0.52777778 0.62222222]
|
|
|
|
mean value: 0.5947760048688842
|
|
|
|
key: train_jcc
|
|
value: [0.79591837 0.51546392 0.75815217 0.76433121 0.46689895 0.72757475
|
|
0.59583333 0.79876161 0.67880795 0.80503145]
|
|
|
|
mean value: 0.6906773711312438
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21017671 0.19325519 0.19400144 0.19451475 0.19573307 0.1954298
|
|
0.19602394 0.19409275 0.19402218 0.19568753]
|
|
|
|
mean value: 0.19629373550415039
|
|
|
|
key: score_time
|
|
value: [0.01554799 0.01554918 0.01552463 0.01568484 0.01562786 0.01594067
|
|
0.01572442 0.01567841 0.01569891 0.0156548 ]
|
|
|
|
mean value: 0.01566317081451416
|
|
|
|
key: test_mcc
|
|
value: [0.91025899 0.8125 0.75 0.78163175 0.74772995 0.84530217
|
|
0.82507166 0.87462485 0.79701677 0.81572458]
|
|
|
|
mean value: 0.8159860715975866
|
|
|
|
key: train_mcc
|
|
value: [0.91599249 0.95453287 0.95453287 0.9579891 0.9439578 0.95154401
|
|
0.94404909 0.94416837 0.95806341 0.95105762]
|
|
|
|
mean value: 0.9475887624403511
|
|
|
|
key: test_accuracy
|
|
value: [0.953125 0.90625 0.875 0.890625 0.87301587 0.92063492
|
|
0.9047619 0.93650794 0.88888889 0.9047619 ]
|
|
|
|
mean value: 0.9053571428571429
|
|
|
|
key: train_accuracy
|
|
value: [0.95789474 0.97719298 0.97719298 0.97894737 0.97197898 0.97548161
|
|
0.97197898 0.97197898 0.97898424 0.97548161]
|
|
|
|
mean value: 0.9737112483485422
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.90625 0.875 0.89230769 0.875 0.92307692
|
|
0.91176471 0.93939394 0.90140845 0.91176471]
|
|
|
|
mean value: 0.9091190297844501
|
|
|
|
key: train_fscore
|
|
value: [0.95833333 0.9773913 0.9773913 0.97909408 0.97202797 0.97594502
|
|
0.97222222 0.97222222 0.97909408 0.97560976]
|
|
|
|
mean value: 0.9739331285091197
|
|
|
|
key: test_precision
|
|
value: [0.91428571 0.90625 0.875 0.87878788 0.84848485 0.88235294
|
|
0.83783784 0.91176471 0.82051282 0.86111111]
|
|
|
|
mean value: 0.8736387858079034
|
|
|
|
key: train_precision
|
|
value: [0.94845361 0.96896552 0.96896552 0.97231834 0.97202797 0.95945946
|
|
0.96551724 0.96219931 0.97231834 0.96885813]
|
|
|
|
mean value: 0.9659083438000281
|
|
|
|
key: test_recall
|
|
value: [1. 0.90625 0.875 0.90625 0.90322581 0.96774194
|
|
1. 0.96875 1. 0.96875 ]
|
|
|
|
mean value: 0.9495967741935484
|
|
|
|
key: train_recall
|
|
value: [0.96842105 0.98596491 0.98596491 0.98596491 0.97202797 0.99300699
|
|
0.97902098 0.98245614 0.98596491 0.98245614]
|
|
|
|
mean value: 0.9821248926512084
|
|
|
|
key: test_roc_auc
|
|
value: [0.953125 0.90625 0.875 0.890625 0.8734879 0.92137097
|
|
0.90625 0.9359879 0.88709677 0.90372984]
|
|
|
|
mean value: 0.9052923387096774
|
|
|
|
key: train_roc_auc
|
|
value: [0.95789474 0.97719298 0.97719298 0.97894737 0.9719789 0.97545086
|
|
0.97196663 0.9719973 0.97899644 0.9754938 ]
|
|
|
|
mean value: 0.9737112010796222
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.82857143 0.77777778 0.80555556 0.77777778 0.85714286
|
|
0.83783784 0.88571429 0.82051282 0.83783784]
|
|
|
|
mean value: 0.8343013893013893
|
|
|
|
key: train_jcc
|
|
value: [0.92 0.95578231 0.95578231 0.95904437 0.94557823 0.95302013
|
|
0.94594595 0.94594595 0.95904437 0.95238095]
|
|
|
|
mean value: 0.9492524572845255
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09946442 0.09689593 0.09891295 0.12208009 0.12569952 0.11471534
|
|
0.09828115 0.1115005 0.09501028 0.12196279]
|
|
|
|
mean value: 0.10845229625701905
|
|
|
|
key: score_time
|
|
value: [0.02143168 0.035707 0.0302124 0.03004122 0.03268075 0.02089787
|
|
0.04332209 0.03567576 0.04053068 0.04182529]
|
|
|
|
mean value: 0.033232474327087404
|
|
|
|
key: test_mcc
|
|
value: [0.87671401 0.84416229 0.78163175 0.78163175 0.78160117 0.78822824
|
|
0.78160117 0.78160117 0.87462485 0.68352185]
|
|
|
|
mean value: 0.79753182497422
|
|
|
|
key: train_mcc
|
|
value: [0.99649736 0.97897147 0.98246219 0.98598919 0.99299472 0.98254074
|
|
0.98601347 0.98949822 0.98601347 0.98598945]
|
|
|
|
mean value: 0.9866970290331141
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.921875 0.890625 0.890625 0.88888889 0.88888889
|
|
0.88888889 0.88888889 0.93650794 0.84126984]
|
|
|
|
mean value: 0.8973958333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.99824561 0.98947368 0.99122807 0.99298246 0.99649737 0.99124343
|
|
0.99299475 0.99474606 0.99299475 0.99299475]
|
|
|
|
mean value: 0.9933400927888899
|
|
|
|
key: test_fscore
|
|
value: [0.93939394 0.92307692 0.89230769 0.88888889 0.89230769 0.89552239
|
|
0.89230769 0.8852459 0.93939394 0.84848485]
|
|
|
|
mean value: 0.8996929905860662
|
|
|
|
key: train_fscore
|
|
value: [0.99824869 0.98951049 0.99124343 0.99295775 0.9965035 0.99130435
|
|
0.99303136 0.99474606 0.99295775 0.99298246]
|
|
|
|
mean value: 0.9933485820457163
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.90909091 0.87878788 0.90322581 0.85294118 0.83333333
|
|
0.85294118 0.93103448 0.91176471 0.82352941]
|
|
|
|
mean value: 0.8808413586892943
|
|
|
|
key: train_precision
|
|
value: [0.9965035 0.98606272 0.98951049 0.99646643 0.9965035 0.98615917
|
|
0.98958333 0.99300699 0.99646643 0.99298246]
|
|
|
|
mean value: 0.992324501450918
|
|
|
|
key: test_recall
|
|
value: [0.96875 0.9375 0.90625 0.875 0.93548387 0.96774194
|
|
0.93548387 0.84375 0.96875 0.875 ]
|
|
|
|
mean value: 0.9213709677419355
|
|
|
|
key: train_recall
|
|
value: [1. 0.99298246 0.99298246 0.98947368 0.9965035 0.9965035
|
|
0.9965035 0.99649123 0.98947368 0.99298246]
|
|
|
|
mean value: 0.994389645442277
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.921875 0.890625 0.890625 0.88961694 0.89012097
|
|
0.88961694 0.88961694 0.9359879 0.84072581]
|
|
|
|
mean value: 0.8976310483870967
|
|
|
|
key: train_roc_auc
|
|
value: [0.99824561 0.98947368 0.99122807 0.99298246 0.99649736 0.9912342
|
|
0.99298859 0.99474911 0.99298859 0.99299472]
|
|
|
|
mean value: 0.9933382407066618
|
|
|
|
key: test_jcc
|
|
value: [0.88571429 0.85714286 0.80555556 0.8 0.80555556 0.81081081
|
|
0.80555556 0.79411765 0.88571429 0.73684211]
|
|
|
|
mean value: 0.8187008658370888
|
|
|
|
key: train_jcc
|
|
value: [0.9965035 0.97923875 0.98263889 0.98601399 0.99303136 0.98275862
|
|
0.98615917 0.98954704 0.98601399 0.98606272]
|
|
|
|
mean value: 0.9867968016968023
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21038747 0.17521906 0.21723461 0.22550559 0.21980762 0.23454189
|
|
0.23472786 0.23469305 0.25624919 0.23777986]
|
|
|
|
mean value: 0.22461462020874023
|
|
|
|
key: score_time
|
|
value: [0.02699494 0.01949954 0.02698517 0.02703357 0.02720189 0.0272665
|
|
0.0273447 0.02730489 0.03539729 0.02732086]
|
|
|
|
mean value: 0.027234935760498048
|
|
|
|
key: test_mcc
|
|
value: [0.6875 0.38729833 0.50395263 0.63628476 0.64134943 0.47384924
|
|
0.61895161 0.52371369 0.42871785 0.43470518]
|
|
|
|
mean value: 0.5336322729777344
|
|
|
|
key: train_mcc
|
|
value: [0.96512618 0.95848494 0.96169363 0.95827234 0.965351 0.965351
|
|
0.97235938 0.95834669 0.96161964 0.96862577]
|
|
|
|
mean value: 0.963523056967868
|
|
|
|
key: test_accuracy
|
|
value: [0.84375 0.6875 0.75 0.8125 0.80952381 0.73015873
|
|
0.80952381 0.76190476 0.71428571 0.71428571]
|
|
|
|
mean value: 0.763343253968254
|
|
|
|
key: train_accuracy
|
|
value: [0.98245614 0.97894737 0.98070175 0.97894737 0.98248687 0.98248687
|
|
0.98598949 0.97898424 0.98073555 0.98423818]
|
|
|
|
mean value: 0.9815973822472117
|
|
|
|
key: test_fscore
|
|
value: [0.84375 0.72222222 0.76470588 0.82857143 0.82857143 0.75362319
|
|
0.80645161 0.76923077 0.72727273 0.74285714]
|
|
|
|
mean value: 0.7787256402387683
|
|
|
|
key: train_fscore
|
|
value: [0.98263889 0.97931034 0.98093588 0.97923875 0.98275862 0.98275862
|
|
0.9862069 0.97923875 0.98086957 0.98434783]
|
|
|
|
mean value: 0.9818304146819015
|
|
|
|
key: test_precision
|
|
value: [0.84375 0.65 0.72222222 0.76315789 0.74358974 0.68421053
|
|
0.80645161 0.75757576 0.70588235 0.68421053]
|
|
|
|
mean value: 0.7361050636600547
|
|
|
|
key: train_precision
|
|
value: [0.97250859 0.96271186 0.96917808 0.96587031 0.96938776 0.96938776
|
|
0.97278912 0.96587031 0.97241379 0.97586207]
|
|
|
|
mean value: 0.9695979639917629
|
|
|
|
key: test_recall
|
|
value: [0.84375 0.8125 0.8125 0.90625 0.93548387 0.83870968
|
|
0.80645161 0.78125 0.75 0.8125 ]
|
|
|
|
mean value: 0.8299395161290323
|
|
|
|
key: train_recall
|
|
value: [0.99298246 0.99649123 0.99298246 0.99298246 0.9965035 0.9965035
|
|
1. 0.99298246 0.98947368 0.99298246]
|
|
|
|
mean value: 0.9943884185989449
|
|
|
|
key: test_roc_auc
|
|
value: [0.84375 0.6875 0.75 0.8125 0.81149194 0.73185484
|
|
0.80947581 0.76159274 0.71370968 0.71270161]
|
|
|
|
mean value: 0.7634576612903226
|
|
|
|
key: train_roc_auc
|
|
value: [0.98245614 0.97894737 0.98070175 0.97894737 0.98246227 0.98246227
|
|
0.98596491 0.97900871 0.98075083 0.98425347]
|
|
|
|
mean value: 0.9815955097534045
|
|
|
|
key: test_jcc
|
|
value: [0.72972973 0.56521739 0.61904762 0.70731707 0.70731707 0.60465116
|
|
0.67567568 0.625 0.57142857 0.59090909]
|
|
|
|
mean value: 0.6396293387227195
|
|
|
|
key: train_jcc
|
|
value: [0.96587031 0.95945946 0.96258503 0.95932203 0.96610169 0.96610169
|
|
0.97278912 0.95932203 0.96245734 0.96917808]
|
|
|
|
mean value: 0.9643186793989418
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.85160112 0.84095502 0.84922051 0.83509898 0.83196425 0.82911921
|
|
0.82614064 0.82878995 0.82055306 0.81654668]
|
|
|
|
mean value: 0.8329989433288574
|
|
|
|
key: score_time
|
|
value: [0.01016712 0.01039219 0.01057267 0.01019001 0.00998306 0.00996137
|
|
0.00951242 0.00953674 0.00991821 0.01015687]
|
|
|
|
mean value: 0.010039067268371582
|
|
|
|
key: test_mcc
|
|
value: [0.93933644 0.875 0.81409158 0.875 0.85238636 0.84530217
|
|
0.84530217 0.93832585 0.84484323 0.68245968]
|
|
|
|
mean value: 0.8512047470887891
|
|
|
|
key: train_mcc
|
|
value: [0.99300691 0.99298246 0.99649736 0.99649736 1. 0.98949822
|
|
0.98949822 0.99301901 0.99299472 0.98598945]
|
|
|
|
mean value: 0.9929983708938576
|
|
|
|
key: test_accuracy
|
|
value: [0.96875 0.9375 0.90625 0.9375 0.92063492 0.92063492
|
|
0.92063492 0.96825397 0.92063492 0.84126984]
|
|
|
|
mean value: 0.9242063492063491
|
|
|
|
key: train_accuracy
|
|
value: [0.99649123 0.99649123 0.99824561 0.99824561 1. 0.99474606
|
|
0.99474606 0.99649737 0.99649737 0.99299475]
|
|
|
|
mean value: 0.9964955295418932
|
|
|
|
key: test_fscore
|
|
value: [0.96969697 0.9375 0.90322581 0.9375 0.92537313 0.92307692
|
|
0.92307692 0.96969697 0.92537313 0.84375 ]
|
|
|
|
mean value: 0.9258269860656114
|
|
|
|
key: train_fscore
|
|
value: [0.99647887 0.99649123 0.99824253 0.99824869 1. 0.99474606
|
|
0.99474606 0.99647887 0.99649123 0.99298246]
|
|
|
|
mean value: 0.996490599511949
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.9375 0.93333333 0.9375 0.86111111 0.88235294
|
|
0.88235294 0.94117647 0.88571429 0.84375 ]
|
|
|
|
mean value: 0.9045967553688142
|
|
|
|
key: train_precision
|
|
value: [1. 0.99649123 1. 0.9965035 1. 0.99649123
|
|
0.99649123 1. 0.99649123 0.99298246]
|
|
|
|
mean value: 0.9975450864924549
|
|
|
|
key: test_recall
|
|
value: [1. 0.9375 0.875 0.9375 1. 0.96774194
|
|
0.96774194 1. 0.96875 0.84375 ]
|
|
|
|
mean value: 0.9497983870967742
|
|
|
|
key: train_recall
|
|
value: [0.99298246 0.99649123 0.99649123 1. 1. 0.99300699
|
|
0.99300699 0.99298246 0.99649123 0.99298246]
|
|
|
|
mean value: 0.9954435038645565
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.9375 0.90625 0.9375 0.921875 0.92137097
|
|
0.92137097 0.96774194 0.91985887 0.84122984]
|
|
|
|
mean value: 0.9243447580645161
|
|
|
|
key: train_roc_auc
|
|
value: [0.99649123 0.99649123 0.99824561 0.99824561 1. 0.99474911
|
|
0.99474911 0.99649123 0.99649736 0.99299472]
|
|
|
|
mean value: 0.9964955220218378
|
|
|
|
key: test_jcc
|
|
value: [0.94117647 0.88235294 0.82352941 0.88235294 0.86111111 0.85714286
|
|
0.85714286 0.94117647 0.86111111 0.72972973]
|
|
|
|
mean value: 0.8636825901531784
|
|
|
|
key: train_jcc
|
|
value: [0.99298246 0.99300699 0.99649123 0.9965035 1. 0.98954704
|
|
0.98954704 0.99298246 0.99300699 0.98606272]
|
|
|
|
mean value: 0.9930130417293447
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03198886 0.03319812 0.03335285 0.03351092 0.03357315 0.03336334
|
|
0.03523016 0.03520679 0.03442335 0.06159759]
|
|
|
|
mean value: 0.036544513702392575
|
|
|
|
key: score_time
|
|
value: [0.01272321 0.01282072 0.01404047 0.02016497 0.01373696 0.01378155
|
|
0.0173955 0.01282907 0.01376104 0.01932311]
|
|
|
|
mean value: 0.015057659149169922
|
|
|
|
key: test_mcc
|
|
value: [0.21442251 0.18442778 0.43033148 0.35043832 0.22008521 0.12607181
|
|
0.21117195 0.36114822 0.3592106 0.18084933]
|
|
|
|
mean value: 0.2638157215951625
|
|
|
|
key: train_mcc
|
|
value: [0.3365728 0.35876576 0.31683766 0.32679675 0.34005692 0.34644988
|
|
0.33683398 0.33242623 0.32917725 0.34206181]
|
|
|
|
mean value: 0.33659790354910774
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.578125 0.65625 0.609375 0.53968254 0.52380952
|
|
0.55555556 0.65079365 0.61904762 0.55555556]
|
|
|
|
mean value: 0.5850694444444444
|
|
|
|
key: train_accuracy
|
|
value: [0.60175439 0.61403509 0.59122807 0.59649123 0.60420315 0.60770578
|
|
0.60245184 0.59894921 0.5971979 0.60420315]
|
|
|
|
mean value: 0.6018219805204781
|
|
|
|
key: test_fscore
|
|
value: [0.68888889 0.66666667 0.74418605 0.71910112 0.68131868 0.66666667
|
|
0.68181818 0.73170732 0.72727273 0.68888889]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
0.6996515188701006
|
|
|
|
key: train_fscore
|
|
value: [0.71518193 0.72151899 0.70983811 0.7125 0.71679198 0.71859296
|
|
0.71589487 0.71339174 0.7125 0.7160804 ]
|
|
|
|
mean value: 0.7152290981730446
|
|
|
|
key: test_precision
|
|
value: [0.53448276 0.55102041 0.59259259 0.56140351 0.51666667 0.50847458
|
|
0.52631579 0.6 0.57142857 0.53448276]
|
|
|
|
mean value: 0.5496867630609276
|
|
|
|
key: train_precision
|
|
value: [0.55664062 0.56435644 0.55019305 0.55339806 0.55859375 0.56078431
|
|
0.55750487 0.55447471 0.55339806 0.55772994]
|
|
|
|
mean value: 0.5567073813824097
|
|
|
|
key: test_recall
|
|
value: [0.96875 0.84375 1. 1. 1. 0.96774194
|
|
0.96774194 0.9375 1. 0.96875 ]
|
|
|
|
mean value: 0.9654233870967742
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.578125 0.65625 0.609375 0.546875 0.53074597
|
|
0.56199597 0.64616935 0.61290323 0.54889113]
|
|
|
|
mean value: 0.5853830645161291
|
|
|
|
key: train_roc_auc
|
|
value: [0.60175439 0.61403509 0.59122807 0.59649123 0.60350877 0.60701754
|
|
0.60175439 0.59965035 0.5979021 0.6048951 ]
|
|
|
|
mean value: 0.6018237026131763
|
|
|
|
key: test_jcc
|
|
value: [0.52542373 0.5 0.59259259 0.56140351 0.51666667 0.5
|
|
0.51724138 0.57692308 0.57142857 0.52542373]
|
|
|
|
mean value: 0.5387103253320301
|
|
|
|
key: train_jcc
|
|
value: [0.55664062 0.56435644 0.55019305 0.55339806 0.55859375 0.56078431
|
|
0.55750487 0.55447471 0.55339806 0.55772994]
|
|
|
|
mean value: 0.5567073813824097
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03761649 0.04023623 0.04000401 0.04022217 0.04029584 0.04453206
|
|
0.04298353 0.04024267 0.04377055 0.03598547]
|
|
|
|
mean value: 0.040588903427124026
|
|
|
|
key: score_time
|
|
value: [0.02147245 0.01967001 0.01933289 0.01929116 0.01915932 0.01928139
|
|
0.0192802 0.01923466 0.01931143 0.01930022]
|
|
|
|
mean value: 0.019533371925354003
|
|
|
|
key: test_mcc
|
|
value: [0.56360186 0.56360186 0.56360186 0.72192954 0.73343622 0.71790017
|
|
0.56086231 0.56710881 0.66625621 0.6385282 ]
|
|
|
|
mean value: 0.629682705082644
|
|
|
|
key: train_mcc
|
|
value: [0.78678385 0.81180663 0.77666348 0.7840214 0.78933933 0.78351298
|
|
0.76948901 0.76372565 0.76304297 0.80585562]
|
|
|
|
mean value: 0.7834240912338475
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.78125 0.78125 0.859375 0.85714286 0.85714286
|
|
0.77777778 0.77777778 0.82539683 0.80952381]
|
|
|
|
mean value: 0.8107886904761905
|
|
|
|
key: train_accuracy
|
|
value: [0.89298246 0.90526316 0.8877193 0.89122807 0.89316988 0.89141856
|
|
0.88441331 0.88091068 0.88091068 0.90192644]
|
|
|
|
mean value: 0.8909942544627769
|
|
|
|
key: test_fscore
|
|
value: [0.77419355 0.78787879 0.77419355 0.86567164 0.86956522 0.86153846
|
|
0.78787879 0.75862069 0.84507042 0.83333333]
|
|
|
|
mean value: 0.8157944438776297
|
|
|
|
key: train_fscore
|
|
value: [0.89536878 0.90784983 0.89078498 0.89455782 0.89782245 0.89383562
|
|
0.8869863 0.88474576 0.88395904 0.90508475]
|
|
|
|
mean value: 0.8940995333789712
|
|
|
|
key: test_precision
|
|
value: [0.8 0.76470588 0.8 0.82857143 0.78947368 0.82352941
|
|
0.74285714 0.84615385 0.76923077 0.75 ]
|
|
|
|
mean value: 0.791452216514136
|
|
|
|
key: train_precision
|
|
value: [0.87583893 0.88372093 0.86710963 0.8679868 0.86173633 0.87583893
|
|
0.86912752 0.8557377 0.86046512 0.87540984]
|
|
|
|
mean value: 0.8692971724259259
|
|
|
|
key: test_recall
|
|
value: [0.75 0.8125 0.75 0.90625 0.96774194 0.90322581
|
|
0.83870968 0.6875 0.9375 0.9375 ]
|
|
|
|
mean value: 0.8490927419354839
|
|
|
|
key: train_recall
|
|
value: [0.91578947 0.93333333 0.91578947 0.92280702 0.93706294 0.91258741
|
|
0.90559441 0.91578947 0.90877193 0.93684211]
|
|
|
|
mean value: 0.9204367562262299
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.78125 0.78125 0.859375 0.85887097 0.8578629
|
|
0.77872984 0.77923387 0.82358871 0.80745968]
|
|
|
|
mean value: 0.8108870967741936
|
|
|
|
key: train_roc_auc
|
|
value: [0.89298246 0.90526316 0.8877193 0.89122807 0.89309287 0.89138143
|
|
0.88437615 0.88097166 0.88095939 0.90198749]
|
|
|
|
mean value: 0.8909961967856705
|
|
|
|
key: test_jcc
|
|
value: [0.63157895 0.65 0.63157895 0.76315789 0.76923077 0.75675676
|
|
0.65 0.61111111 0.73170732 0.71428571]
|
|
|
|
mean value: 0.6909407457931207
|
|
|
|
key: train_jcc
|
|
value: [0.81055901 0.83125 0.80307692 0.80923077 0.81458967 0.80804954
|
|
0.79692308 0.79331307 0.79204893 0.82662539]
|
|
|
|
mean value: 0.8085666363268487
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_cd_7030.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.2927134 0.30341053 0.30166936 0.41127467 0.32519579 0.31081867
|
|
0.3082726 0.35637784 0.29826283 0.31384945]
|
|
|
|
mean value: 0.32218451499938966
|
|
|
|
key: score_time
|
|
value: [0.01918674 0.01912594 0.01932597 0.0190556 0.01911068 0.01906705
|
|
0.01924586 0.0189929 0.0189693 0.01898503]
|
|
|
|
mean value: 0.019106507301330566
|
|
|
|
key: test_mcc
|
|
value: [0.59404013 0.56360186 0.56360186 0.72192954 0.73343622 0.63159952
|
|
0.56086231 0.62475802 0.71705182 0.66625621]
|
|
|
|
mean value: 0.6377137491806849
|
|
|
|
key: train_mcc
|
|
value: [0.80761932 0.81180663 0.77666348 0.7840214 0.78933933 0.82176407
|
|
0.80817796 0.79970671 0.79081099 0.82735112]
|
|
|
|
mean value: 0.8017261012819527
|
|
|
|
key: test_accuracy
|
|
value: [0.796875 0.78125 0.78125 0.859375 0.85714286 0.80952381
|
|
0.77777778 0.80952381 0.85714286 0.82539683]
|
|
|
|
mean value: 0.8155257936507936
|
|
|
|
key: train_accuracy
|
|
value: [0.90350877 0.90526316 0.8877193 0.89122807 0.89316988 0.91068301
|
|
0.90367776 0.89842382 0.89492119 0.91243433]
|
|
|
|
mean value: 0.900102928073248
|
|
|
|
key: test_fscore
|
|
value: [0.79365079 0.78787879 0.77419355 0.86567164 0.86956522 0.82352941
|
|
0.78787879 0.8 0.86567164 0.84507042]
|
|
|
|
mean value: 0.8213110253068777
|
|
|
|
key: train_fscore
|
|
value: [0.90533563 0.90784983 0.89078498 0.89455782 0.89782245 0.91222031
|
|
0.90598291 0.9023569 0.89726027 0.91554054]
|
|
|
|
mean value: 0.9029711641867898
|
|
|
|
key: test_precision
|
|
value: [0.80645161 0.76470588 0.8 0.82857143 0.78947368 0.75675676
|
|
0.74285714 0.85714286 0.82857143 0.76923077]
|
|
|
|
mean value: 0.7943761562597077
|
|
|
|
key: train_precision
|
|
value: [0.88851351 0.88372093 0.86710963 0.8679868 0.86173633 0.89830508
|
|
0.88628763 0.86731392 0.87625418 0.88273616]
|
|
|
|
mean value: 0.8779964174357806
|
|
|
|
key: test_recall
|
|
value: [0.78125 0.8125 0.75 0.90625 0.96774194 0.90322581
|
|
0.83870968 0.75 0.90625 0.9375 ]
|
|
|
|
mean value: 0.8553427419354839
|
|
|
|
key: train_recall
|
|
value: [0.92280702 0.93333333 0.91578947 0.92280702 0.93706294 0.92657343
|
|
0.92657343 0.94035088 0.91929825 0.95087719]
|
|
|
|
mean value: 0.9295472948104527
|
|
|
|
key: test_roc_auc
|
|
value: [0.796875 0.78125 0.78125 0.859375 0.85887097 0.8109879
|
|
0.77872984 0.81048387 0.85635081 0.82358871]
|
|
|
|
mean value: 0.8157762096774194
|
|
|
|
key: train_roc_auc
|
|
value: [0.90350877 0.90526316 0.8877193 0.89122807 0.89309287 0.91065513
|
|
0.90363759 0.89849712 0.89496381 0.91250153]
|
|
|
|
mean value: 0.9001067353698933
|
|
|
|
key: test_jcc
|
|
value: [0.65789474 0.65 0.63157895 0.76315789 0.76923077 0.7
|
|
0.65 0.66666667 0.76315789 0.73170732]
|
|
|
|
mean value: 0.6983394226654818
|
|
|
|
key: train_jcc
|
|
value: [0.82704403 0.83125 0.80307692 0.80923077 0.81458967 0.83860759
|
|
0.828125 0.82208589 0.8136646 0.84423676]
|
|
|
|
mean value: 0.8231911224023584
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.73
|