19742 lines
974 KiB
Text
19742 lines
974 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_orig.py:550: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 817
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 817
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({1: 309, 0: 158}) Data dim: (467, 175)
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data: ORIGINAL training
|
|
actual values: training set
|
|
imputed values: blind test set
|
|
Train data size: (467, 175)
|
|
Test data size: (350, 175)
|
|
y_train numbers: Counter({1: 309, 0: 158})
|
|
y_train ratio: 0.511326860841424
|
|
|
|
y_test_numbers: Counter({0: 315, 1: 35})
|
|
y_test ratio: 9.0
|
|
-------------------------------------------------------------
|
|
Simple Random OverSampling
|
|
Counter({1: 309, 0: 309})
|
|
(618, 175)
|
|
Simple Random UnderSampling
|
|
Counter({0: 158, 1: 158})
|
|
(316, 175)
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 309, 1: 309})
|
|
(618, 175)
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 309, 0: 309})
|
|
(618, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: ORIGINAL
|
|
Gene name: katG
|
|
Drug name: isoniazid
|
|
|
|
Output directory: /home/tanu/git/Data/isoniazid/output/ml/tts_orig/
|
|
|
|
Sanity checks:
|
|
Total input features: 175
|
|
|
|
Training data size: (467, 175)
|
|
Test data size: (350, 175)
|
|
|
|
Target feature numbers (training data): Counter({1: 309, 0: 158})
|
|
Target features ratio (training data: 0.511326860841424
|
|
|
|
Target feature numbers (test data): Counter({0: 315, 1: 35})
|
|
Target features ratio (test data): 9.0
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04550624 0.03566933 0.03764677 0.03806186 0.03683448 0.03676414
|
|
0.03676653 0.03999543 0.03731942 0.04414487]
|
|
|
|
mean value: 0.03887090682983398
|
|
|
|
key: score_time
|
|
value: [0.01246476 0.01227474 0.01236653 0.01562428 0.01560974 0.01561832
|
|
0.01560354 0.01569724 0.01537657 0.01530743]
|
|
|
|
mean value: 0.014594316482543945
|
|
|
|
key: test_mcc
|
|
value: [0.90662544 0.66402366 0.65994312 0.90662544 0.8084425 0.66337469
|
|
0.72715272 0.75776742 0.54774009 0.75806977]
|
|
|
|
mean value: 0.7399764867716851
|
|
|
|
key: train_mcc
|
|
value: [0.85008968 0.83374086 0.82797794 0.79517432 0.8061574 0.83910661
|
|
0.82837741 0.80706626 0.81804827 0.82292436]
|
|
|
|
mean value: 0.822866310204347
|
|
|
|
key: test_accuracy
|
|
value: [0.95744681 0.85106383 0.85106383 0.95744681 0.91489362 0.85106383
|
|
0.87234043 0.89130435 0.80434783 0.89130435]
|
|
|
|
mean value: 0.88422756706753
|
|
|
|
key: train_accuracy
|
|
value: [0.93333333 0.92619048 0.92380952 0.90952381 0.91428571 0.92857143
|
|
0.92380952 0.91448931 0.9192399 0.9216152 ]
|
|
|
|
mean value: 0.9214868227576066
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.88888889 0.89230769 0.96875 0.9375 0.89552239
|
|
0.9 0.91803279 0.85714286 0.92063492]
|
|
|
|
mean value: 0.9147529533919306
|
|
|
|
key: train_fscore
|
|
value: [0.95104895 0.94589878 0.94385965 0.93356643 0.93706294 0.94755245
|
|
0.94425087 0.93684211 0.94055944 0.9426087 ]
|
|
|
|
mean value: 0.9423250309268
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.875 0.85294118 0.93939394 0.90909091 0.83333333
|
|
0.93103448 0.93333333 0.84375 0.87878788]
|
|
|
|
mean value: 0.8936058992562542
|
|
|
|
key: train_precision
|
|
value: [0.92517007 0.91864407 0.92123288 0.90816327 0.91156463 0.92176871
|
|
0.91554054 0.91438356 0.91496599 0.91554054]
|
|
|
|
mean value: 0.916697424029508
|
|
|
|
key: test_recall
|
|
value: [1. 0.90322581 0.93548387 1. 0.96774194 0.96774194
|
|
0.87096774 0.90322581 0.87096774 0.96666667]
|
|
|
|
mean value: 0.9386021505376344
|
|
|
|
key: train_recall
|
|
value: [0.97841727 0.97482014 0.9676259 0.96043165 0.96402878 0.97482014
|
|
0.97482014 0.96043165 0.9676259 0.97132616]
|
|
|
|
mean value: 0.9694347747608365
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.8266129 0.81149194 0.9375 0.89012097 0.79637097
|
|
0.87298387 0.88494624 0.7688172 0.85833333]
|
|
|
|
mean value: 0.8584677419354839
|
|
|
|
key: train_roc_auc
|
|
value: [0.91174384 0.90290303 0.90282703 0.8851454 0.89046509 0.90642416
|
|
0.8993819 0.89280324 0.89640036 0.89763491]
|
|
|
|
mean value: 0.8985728980669149
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.8 0.80555556 0.93939394 0.88235294 0.81081081
|
|
0.81818182 0.84848485 0.75 0.85294118]
|
|
|
|
mean value: 0.8447115029467971
|
|
|
|
key: train_jcc
|
|
value: [0.90666667 0.89735099 0.89368771 0.87540984 0.88157895 0.90033223
|
|
0.89438944 0.88118812 0.88778878 0.89144737]
|
|
|
|
mean value: 0.8909840082087678
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.99850392 0.90923595 0.88660312 1.0262289 0.91332746 0.87034369
|
|
0.93770385 0.96103406 0.87058878 0.85822821]
|
|
|
|
mean value: 0.9231797933578492
|
|
|
|
key: score_time
|
|
value: [0.01522923 0.01579213 0.01238084 0.01559043 0.01568437 0.01571655
|
|
0.01861119 0.01919341 0.01918507 0.01683927]
|
|
|
|
mean value: 0.016422247886657713
|
|
|
|
key: test_mcc
|
|
value: [0.90524194 0.8566725 0.95436677 0.8566725 0.90662544 0.76032282
|
|
0.81048387 0.85513419 0.79930604 0.72379255]
|
|
|
|
mean value: 0.8428618620895384
|
|
|
|
key: train_mcc
|
|
value: [0.97336948 1. 1. 0.9680267 0.96269263 0.97870346
|
|
0.97336948 0.96817602 0.96817602 1. ]
|
|
|
|
mean value: 0.979251380217927
|
|
|
|
key: test_accuracy
|
|
value: [0.95744681 0.93617021 0.9787234 0.93617021 0.95744681 0.89361702
|
|
0.91489362 0.93478261 0.91304348 0.86956522]
|
|
|
|
mean value: 0.9291859389454209
|
|
|
|
key: train_accuracy
|
|
value: [0.98809524 1. 1. 0.98571429 0.98333333 0.99047619
|
|
0.98809524 0.98574822 0.98574822 1. ]
|
|
|
|
mean value: 0.9907210722768918
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.95238095 0.98360656 0.95238095 0.96875 0.92307692
|
|
0.93548387 0.95081967 0.9375 0.89655172]
|
|
|
|
mean value: 0.9468292587936569
|
|
|
|
key: train_fscore
|
|
value: [0.99102334 1. 1. 0.98924731 0.98747764 0.99283154
|
|
0.99102334 0.98924731 0.98924731 1. ]
|
|
|
|
mean value: 0.9930097793978486
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.9375 1. 0.9375 0.93939394 0.88235294
|
|
0.93548387 0.96666667 0.90909091 0.92857143]
|
|
|
|
mean value: 0.9404301691351027
|
|
|
|
key: train_precision
|
|
value: [0.98924731 1. 1. 0.98571429 0.98220641 0.98928571
|
|
0.98924731 0.98571429 0.98571429 1. ]
|
|
|
|
mean value: 0.9907129600778436
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.96774194 0.96774194 1. 0.96774194
|
|
0.93548387 0.93548387 0.96774194 0.86666667]
|
|
|
|
mean value: 0.9544086021505377
|
|
|
|
key: train_recall
|
|
value: [0.99280576 1. 1. 0.99280576 0.99280576 0.99640288
|
|
0.99280576 0.99280576 0.99280576 1. ]
|
|
|
|
mean value: 0.9953237410071942
|
|
|
|
key: test_roc_auc
|
|
value: [0.95262097 0.92137097 0.98387097 0.92137097 0.9375 0.85887097
|
|
0.90524194 0.9344086 0.88387097 0.87083333]
|
|
|
|
mean value: 0.9169959677419355
|
|
|
|
key: train_roc_auc
|
|
value: [0.9858395 1. 1. 0.98231837 0.97879724 0.98763806
|
|
0.9858395 0.98241686 0.98241686 1. ]
|
|
|
|
mean value: 0.9885266395373803
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.90909091 0.96774194 0.90909091 0.93939394 0.85714286
|
|
0.87878788 0.90625 0.88235294 0.8125 ]
|
|
|
|
mean value: 0.8999851370166835
|
|
|
|
key: train_jcc
|
|
value: [0.98220641 1. 1. 0.9787234 0.97526502 0.98576512
|
|
0.98220641 0.9787234 0.9787234 1. ]
|
|
|
|
mean value: 0.9861613166376862
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.4
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01437426 0.01120424 0.0100081 0.00968909 0.00983858 0.00988436
|
|
0.01077795 0.01006675 0.00971889 0.01089692]
|
|
|
|
mean value: 0.010645914077758788
|
|
|
|
key: score_time
|
|
value: [0.01239276 0.00954723 0.00908661 0.00897408 0.00899839 0.00888991
|
|
0.00891042 0.00963974 0.00894403 0.00965524]
|
|
|
|
mean value: 0.009503841400146484
|
|
|
|
key: test_mcc
|
|
value: [0.48712471 0.31590883 0.56769924 0.62096774 0.48712471 0.76032282
|
|
0.59764284 0.54667108 0.33208342 0.33864811]
|
|
|
|
mean value: 0.5054193489018697
|
|
|
|
key: train_mcc
|
|
value: [0.50746207 0.58785983 0.62257686 0.51310898 0.53815026 0.61847806
|
|
0.59992952 0.56870287 0.61596238 0.53164607]
|
|
|
|
mean value: 0.5703876902905627
|
|
|
|
key: test_accuracy
|
|
value: [0.76595745 0.65957447 0.80851064 0.82978723 0.76595745 0.89361702
|
|
0.80851064 0.7826087 0.69565217 0.67391304]
|
|
|
|
mean value: 0.76840888066605
|
|
|
|
key: train_accuracy
|
|
value: [0.78333333 0.80952381 0.82142857 0.77380952 0.78809524 0.81904762
|
|
0.81190476 0.79809976 0.81947743 0.75771971]
|
|
|
|
mean value: 0.7982439769256872
|
|
|
|
key: test_fscore
|
|
value: [0.81967213 0.71428571 0.85714286 0.87096774 0.81967213 0.92307692
|
|
0.84745763 0.82758621 0.76666667 0.72727273]
|
|
|
|
mean value: 0.817380072669065
|
|
|
|
key: train_fscore
|
|
value: [0.83950617 0.85185185 0.85875706 0.82309125 0.83609576 0.85660377
|
|
0.85178236 0.8411215 0.8576779 0.79518072]
|
|
|
|
mean value: 0.8411668357185847
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.8 0.84375 0.87096774 0.83333333 0.88235294
|
|
0.89285714 0.88888889 0.79310345 0.8 ]
|
|
|
|
mean value: 0.8438586829800515
|
|
|
|
key: train_precision
|
|
value: [0.82352941 0.8778626 0.90118577 0.85328185 0.85660377 0.90079365
|
|
0.89019608 0.87548638 0.89453125 0.90410959]
|
|
|
|
mean value: 0.8777580354391377
|
|
|
|
key: test_recall
|
|
value: [0.80645161 0.64516129 0.87096774 0.87096774 0.80645161 0.96774194
|
|
0.80645161 0.77419355 0.74193548 0.66666667]
|
|
|
|
mean value: 0.7956989247311828
|
|
|
|
key: train_recall
|
|
value: [0.85611511 0.82733813 0.82014388 0.79496403 0.81654676 0.81654676
|
|
0.81654676 0.80935252 0.82374101 0.70967742]
|
|
|
|
mean value: 0.8090972383383616
|
|
|
|
key: test_roc_auc
|
|
value: [0.74697581 0.66633065 0.77923387 0.81048387 0.74697581 0.85887097
|
|
0.80947581 0.78709677 0.67096774 0.67708333]
|
|
|
|
mean value: 0.7553494623655914
|
|
|
|
key: train_roc_auc
|
|
value: [0.74848009 0.80099301 0.82204377 0.7636792 0.77447056 0.82024521
|
|
0.80968183 0.79278815 0.81746491 0.78089505]
|
|
|
|
mean value: 0.793074178117275
|
|
|
|
key: test_jcc
|
|
value: [0.69444444 0.55555556 0.75 0.77142857 0.69444444 0.85714286
|
|
0.73529412 0.70588235 0.62162162 0.57142857]
|
|
|
|
mean value: 0.6957242536654301
|
|
|
|
key: train_jcc
|
|
value: [0.72340426 0.74193548 0.75247525 0.69936709 0.71835443 0.74917492
|
|
0.74183007 0.72580645 0.75081967 0.66 ]
|
|
|
|
mean value: 0.7263167612297488
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01024628 0.01106691 0.01009178 0.01026082 0.01036048 0.01026225
|
|
0.01013184 0.01017618 0.01020217 0.01028705]
|
|
|
|
mean value: 0.010308575630187989
|
|
|
|
key: score_time
|
|
value: [0.00911188 0.00916982 0.00888133 0.0091939 0.00980973 0.00894856
|
|
0.00905204 0.00923276 0.00895095 0.00896621]
|
|
|
|
mean value: 0.009131717681884765
|
|
|
|
key: test_mcc
|
|
value: [0.72715272 0.52620968 0.50611184 0.71025956 0.71206211 0.56329266
|
|
0.55956342 0.49033059 0.24538756 0.65669997]
|
|
|
|
mean value: 0.5697070127041126
|
|
|
|
key: train_mcc
|
|
value: [0.60428127 0.6506538 0.68534362 0.63499734 0.65670743 0.66210484
|
|
0.65614514 0.67599229 0.67555291 0.64020363]
|
|
|
|
mean value: 0.654198226500829
|
|
|
|
key: test_accuracy
|
|
value: [0.87234043 0.78723404 0.78723404 0.87234043 0.87234043 0.80851064
|
|
0.80851064 0.7826087 0.67391304 0.84782609]
|
|
|
|
mean value: 0.8112858464384829
|
|
|
|
key: train_accuracy
|
|
value: [0.82619048 0.84761905 0.86190476 0.84047619 0.85 0.85238095
|
|
0.85 0.85748219 0.85748219 0.8432304 ]
|
|
|
|
mean value: 0.8486766202918222
|
|
|
|
key: test_fscore
|
|
value: [0.9 0.83870968 0.84848485 0.90625 0.90909091 0.86956522
|
|
0.86153846 0.84375 0.76190476 0.88888889]
|
|
|
|
mean value: 0.8628182764718529
|
|
|
|
key: train_fscore
|
|
value: [0.87170475 0.88965517 0.8986014 0.88347826 0.89081456 0.89273356
|
|
0.89156627 0.8951049 0.89547038 0.8862069 ]
|
|
|
|
mean value: 0.8895336139116604
|
|
|
|
key: test_precision
|
|
value: [0.93103448 0.83870968 0.8 0.87878788 0.85714286 0.78947368
|
|
0.82352941 0.81818182 0.75 0.84848485]
|
|
|
|
mean value: 0.8335344658750611
|
|
|
|
key: train_precision
|
|
value: [0.85223368 0.85430464 0.87414966 0.85521886 0.85953177 0.86
|
|
0.85478548 0.8707483 0.86824324 0.8538206 ]
|
|
|
|
mean value: 0.8603036219513056
|
|
|
|
key: test_recall
|
|
value: [0.87096774 0.83870968 0.90322581 0.93548387 0.96774194 0.96774194
|
|
0.90322581 0.87096774 0.77419355 0.93333333]
|
|
|
|
mean value: 0.8965591397849463
|
|
|
|
key: train_recall
|
|
value: [0.89208633 0.92805755 0.92446043 0.91366906 0.92446043 0.92805755
|
|
0.93165468 0.92086331 0.92446043 0.92114695]
|
|
|
|
mean value: 0.920891673757768
|
|
|
|
key: test_roc_auc
|
|
value: [0.87298387 0.76310484 0.7328629 0.84274194 0.82762097 0.73387097
|
|
0.7641129 0.73548387 0.62043011 0.81041667]
|
|
|
|
mean value: 0.7703629032258065
|
|
|
|
key: train_roc_auc
|
|
value: [0.79463471 0.8090992 0.83194853 0.80542608 0.81434289 0.81614145
|
|
0.81089776 0.82756452 0.82586658 0.8056439 ]
|
|
|
|
mean value: 0.8141565627727084
|
|
|
|
key: test_jcc
|
|
value: [0.81818182 0.72222222 0.73684211 0.82857143 0.83333333 0.76923077
|
|
0.75675676 0.72972973 0.61538462 0.8 ]
|
|
|
|
mean value: 0.7610252778673832
|
|
|
|
key: train_jcc
|
|
value: [0.77258567 0.80124224 0.81587302 0.79127726 0.803125 0.80625
|
|
0.80434783 0.81012658 0.81072555 0.79566563]
|
|
|
|
mean value: 0.8011218775337603
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00970244 0.00998521 0.01048708 0.00994468 0.00962806 0.0105741
|
|
0.01067281 0.01052999 0.00967312 0.01057601]
|
|
|
|
mean value: 0.010177350044250489
|
|
|
|
key: score_time
|
|
value: [0.0793097 0.01351643 0.01241183 0.01221108 0.01160502 0.0174439
|
|
0.01262283 0.01519203 0.01503754 0.01247334]
|
|
|
|
mean value: 0.020182371139526367
|
|
|
|
key: test_mcc
|
|
value: [0.4512753 0.60908698 0.557325 0.60908698 0.76034808 0.17507316
|
|
0.50611184 0.47977675 0.35831956 0.39770584]
|
|
|
|
mean value: 0.4904109472286944
|
|
|
|
key: train_mcc
|
|
value: [0.65026131 0.59308253 0.62246377 0.59871999 0.57608635 0.62868128
|
|
0.60446554 0.62393453 0.67621075 0.65060935]
|
|
|
|
mean value: 0.6224515401625214
|
|
|
|
key: test_accuracy
|
|
value: [0.76595745 0.82978723 0.80851064 0.82978723 0.89361702 0.65957447
|
|
0.78723404 0.7826087 0.73913043 0.73913043]
|
|
|
|
mean value: 0.7835337650323774
|
|
|
|
key: train_accuracy
|
|
value: [0.84761905 0.82380952 0.83571429 0.82619048 0.81666667 0.83809524
|
|
0.82857143 0.83610451 0.85748219 0.847981 ]
|
|
|
|
mean value: 0.8358234362628661
|
|
|
|
key: test_fscore
|
|
value: [0.8358209 0.87878788 0.86567164 0.87878788 0.92063492 0.76470588
|
|
0.84848485 0.84848485 0.82352941 0.8125 ]
|
|
|
|
mean value: 0.8477408206611455
|
|
|
|
key: train_fscore
|
|
value: [0.89078498 0.87414966 0.88123924 0.87606112 0.86882453 0.88235294
|
|
0.87878788 0.8836425 0.8989899 0.89115646]
|
|
|
|
mean value: 0.8825989214867034
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.82857143 0.80555556 0.82857143 0.90625 0.7027027
|
|
0.8 0.8 0.75675676 0.76470588]
|
|
|
|
mean value: 0.7970891532288591
|
|
|
|
key: train_precision
|
|
value: [0.8474026 0.82903226 0.84488449 0.82958199 0.82524272 0.85
|
|
0.82594937 0.83174603 0.84493671 0.84789644]
|
|
|
|
mean value: 0.8376672603756541
|
|
|
|
key: test_recall
|
|
value: [0.90322581 0.93548387 0.93548387 0.93548387 0.93548387 0.83870968
|
|
0.90322581 0.90322581 0.90322581 0.86666667]
|
|
|
|
mean value: 0.9060215053763441
|
|
|
|
key: train_recall
|
|
value: [0.93884892 0.92446043 0.92086331 0.92805755 0.91726619 0.91726619
|
|
0.93884892 0.94244604 0.96043165 0.9390681 ]
|
|
|
|
mean value: 0.9327557308991516
|
|
|
|
key: test_roc_auc
|
|
value: [0.7016129 0.78024194 0.74899194 0.78024194 0.87399194 0.57560484
|
|
0.7328629 0.71827957 0.6516129 0.68333333]
|
|
|
|
mean value: 0.7246774193548388
|
|
|
|
key: train_roc_auc
|
|
value: [0.8039315 0.7756105 0.7949387 0.77740906 0.76849225 0.80018239
|
|
0.77576249 0.78590834 0.80888716 0.80404109]
|
|
|
|
mean value: 0.7895163466866486
|
|
|
|
key: test_jcc
|
|
value: [0.71794872 0.78378378 0.76315789 0.78378378 0.85294118 0.61904762
|
|
0.73684211 0.73684211 0.7 0.68421053]
|
|
|
|
mean value: 0.737855771261344
|
|
|
|
key: train_jcc
|
|
value: [0.80307692 0.77643505 0.78769231 0.77945619 0.76807229 0.78947368
|
|
0.78378378 0.79154079 0.81651376 0.80368098]
|
|
|
|
mean value: 0.7899725755152334
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.39
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02232766 0.01876426 0.02162337 0.01979256 0.02090979 0.0185473
|
|
0.01879716 0.01888871 0.01875162 0.01901221]
|
|
|
|
mean value: 0.019741463661193847
|
|
|
|
key: score_time
|
|
value: [0.01124477 0.01115179 0.01221538 0.0110321 0.01119208 0.01118159
|
|
0.01107144 0.01110148 0.01091552 0.01118755]
|
|
|
|
mean value: 0.011229372024536133
|
|
|
|
key: test_mcc
|
|
value: [0.86070252 0.72363572 0.557325 0.71206211 0.66337469 0.6139232
|
|
0.61207663 0.59332241 0.30795894 0.72168784]
|
|
|
|
mean value: 0.6366069048812995
|
|
|
|
key: train_mcc
|
|
value: [0.69610881 0.69057002 0.70716866 0.69159168 0.69057002 0.73588387
|
|
0.70257433 0.67685276 0.72049649 0.69640412]
|
|
|
|
mean value: 0.7008220750346401
|
|
|
|
key: test_accuracy
|
|
value: [0.93617021 0.87234043 0.80851064 0.87234043 0.85106383 0.82978723
|
|
0.82978723 0.82608696 0.7173913 0.86956522]
|
|
|
|
mean value: 0.841304347826087
|
|
|
|
key: train_accuracy
|
|
value: [0.86666667 0.86428571 0.87142857 0.86428571 0.86428571 0.88333333
|
|
0.86904762 0.85748219 0.87648456 0.86698337]
|
|
|
|
mean value: 0.8684283452098179
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.91176471 0.86567164 0.90909091 0.89552239 0.88235294
|
|
0.875 0.875 0.80597015 0.90909091]
|
|
|
|
mean value: 0.8883309798191273
|
|
|
|
key: train_fscore
|
|
value: [0.9047619 0.90322581 0.90784983 0.90387858 0.90322581 0.9165247
|
|
0.90693739 0.89932886 0.91156463 0.90508475]
|
|
|
|
mean value: 0.9062382257284957
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.83783784 0.80555556 0.85714286 0.83333333 0.81081081
|
|
0.84848485 0.84848485 0.75 0.83333333]
|
|
|
|
mean value: 0.8336748130865778
|
|
|
|
key: train_precision
|
|
value: [0.85806452 0.85530547 0.86363636 0.85079365 0.85530547 0.87055016
|
|
0.85623003 0.8427673 0.86451613 0.8585209 ]
|
|
|
|
mean value: 0.8575689981747396
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.93548387 0.96774194 0.96774194 0.96774194
|
|
0.90322581 0.90322581 0.87096774 1. ]
|
|
|
|
mean value: 0.9516129032258065
|
|
|
|
key: train_recall
|
|
value: [0.95683453 0.95683453 0.95683453 0.96402878 0.95683453 0.9676259
|
|
0.96402878 0.96402878 0.96402878 0.95698925]
|
|
|
|
mean value: 0.9608068384002476
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.8125 0.74899194 0.82762097 0.79637097 0.76512097
|
|
0.7953629 0.78494624 0.63548387 0.8125 ]
|
|
|
|
mean value: 0.7885147849462366
|
|
|
|
key: train_roc_auc
|
|
value: [0.82348769 0.81996656 0.83052994 0.81652143 0.81996656 0.84296788
|
|
0.82356368 0.80718921 0.83516124 0.82356505]
|
|
|
|
mean value: 0.8242919250604606
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.83783784 0.76315789 0.83333333 0.81081081 0.78947368
|
|
0.77777778 0.77777778 0.675 0.83333333]
|
|
|
|
mean value: 0.8010267155700592
|
|
|
|
key: train_jcc
|
|
value: [0.82608696 0.82352941 0.83125 0.82461538 0.82352941 0.84591195
|
|
0.82972136 0.81707317 0.8375 0.82662539]
|
|
|
|
mean value: 0.8285843034309783
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.41509366 1.89070988 0.35135531 1.76246381 1.76783442 1.66246915
|
|
1.52473354 0.69678211 0.60859632 1.48020411]
|
|
|
|
mean value: 1.2160242319107055
|
|
|
|
key: score_time
|
|
value: [0.01236773 0.01505613 0.01239443 0.01246619 0.02640224 0.01245737
|
|
0.01242447 0.01241732 0.01241112 0.01234865]
|
|
|
|
mean value: 0.014074563980102539
|
|
|
|
key: test_mcc
|
|
value: [0.90662544 0.67402153 0.55956342 0.95299692 0.95299692 0.66337469
|
|
0.71572581 0.75776742 0.44695591 0.61666667]
|
|
|
|
mean value: 0.7246694736926886
|
|
|
|
key: train_mcc
|
|
value: [0.75124204 0.98408226 0.78027884 0.95249586 0.9680267 0.95736701
|
|
0.90500503 0.83068165 0.81289932 0.95227009]
|
|
|
|
mean value: 0.8894348800248977
|
|
|
|
key: test_accuracy
|
|
value: [0.95744681 0.85106383 0.80851064 0.9787234 0.9787234 0.85106383
|
|
0.87234043 0.89130435 0.76086957 0.82608696]
|
|
|
|
mean value: 0.877613320999075
|
|
|
|
key: train_accuracy
|
|
value: [0.89047619 0.99285714 0.90238095 0.97857143 0.98571429 0.98095238
|
|
0.95714286 0.9239905 0.91686461 0.97862233]
|
|
|
|
mean value: 0.9507572672774574
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.8852459 0.86153846 0.98412698 0.98412698 0.89552239
|
|
0.90322581 0.91803279 0.82539683 0.86666667]
|
|
|
|
mean value: 0.9092632804891827
|
|
|
|
key: train_fscore
|
|
value: [0.91958042 0.99459459 0.92691622 0.9840708 0.98924731 0.98571429
|
|
0.96853147 0.94482759 0.93913043 0.9840708 ]
|
|
|
|
mean value: 0.9636683915192453
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.9 0.82352941 0.96875 0.96875 0.83333333
|
|
0.90322581 0.93333333 0.8125 0.86666667]
|
|
|
|
mean value: 0.8949482490943592
|
|
|
|
key: train_precision
|
|
value: [0.89455782 0.99638989 0.91872792 0.96864111 0.98571429 0.9787234
|
|
0.94217687 0.90728477 0.90909091 0.97202797]
|
|
|
|
mean value: 0.9473334955051633
|
|
|
|
key: test_recall
|
|
value: [1. 0.87096774 0.90322581 1. 1. 0.96774194
|
|
0.90322581 0.90322581 0.83870968 0.86666667]
|
|
|
|
mean value: 0.9253763440860215
|
|
|
|
key: train_recall
|
|
value: [0.94604317 0.99280576 0.9352518 1. 0.99280576 0.99280576
|
|
0.99640288 0.98561151 0.97122302 0.99641577]
|
|
|
|
mean value: 0.9809365410897088
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.84173387 0.7641129 0.96875 0.96875 0.79637097
|
|
0.8578629 0.88494624 0.71935484 0.80833333]
|
|
|
|
mean value: 0.854771505376344
|
|
|
|
key: train_roc_auc
|
|
value: [0.86386665 0.99288175 0.88663998 0.96830986 0.98231837 0.97527612
|
|
0.93834228 0.89490366 0.89120592 0.97003887]
|
|
|
|
mean value: 0.9363783463845078
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.79411765 0.75675676 0.96875 0.96875 0.81081081
|
|
0.82352941 0.84848485 0.7027027 0.76470588]
|
|
|
|
mean value: 0.8378001999325528
|
|
|
|
key: train_jcc
|
|
value: [0.85113269 0.98924731 0.86378738 0.96864111 0.9787234 0.97183099
|
|
0.93898305 0.89542484 0.8852459 0.96864111]
|
|
|
|
mean value: 0.931165778255146
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02629733 0.02072287 0.01916671 0.02101421 0.01953888 0.02145672
|
|
0.02063489 0.02003598 0.02033472 0.01894474]
|
|
|
|
mean value: 0.02081470489501953
|
|
|
|
key: score_time
|
|
value: [0.01246381 0.00959682 0.00929546 0.00943065 0.00951147 0.00888705
|
|
0.00958252 0.0094862 0.00885487 0.00886083]
|
|
|
|
mean value: 0.009596967697143554
|
|
|
|
key: test_mcc
|
|
value: [0.90524194 0.81048387 1. 0.91188882 0.90662544 0.76032282
|
|
1. 0.86757603 0.90107527 0.80651412]
|
|
|
|
mean value: 0.8869728313481567
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95744681 0.91489362 1. 0.95744681 0.95744681 0.89361702
|
|
1. 0.93478261 0.95652174 0.91304348]
|
|
|
|
mean value: 0.9485198889916744
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.93548387 1. 0.96666667 0.96875 0.92307692
|
|
1. 0.94915254 0.96774194 0.93548387]
|
|
|
|
mean value: 0.9614097745019696
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.93548387 1. 1. 0.93939394 0.88235294
|
|
1. 1. 0.96774194 0.90625 ]
|
|
|
|
mean value: 0.9598964622505894
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 1. 0.93548387 1. 0.96774194
|
|
1. 0.90322581 0.96774194 0.96666667]
|
|
|
|
mean value: 0.9644086021505376
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95262097 0.90524194 1. 0.96774194 0.9375 0.85887097
|
|
1. 0.9516129 0.95053763 0.88958333]
|
|
|
|
mean value: 0.9413709677419355
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.87878788 1. 0.93548387 0.93939394 0.85714286
|
|
1. 0.90322581 0.9375 0.87878788]
|
|
|
|
mean value: 0.9267822231531909
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11682916 0.11507559 0.12848043 0.12166643 0.11733985 0.11582923
|
|
0.11812663 0.117419 0.11527824 0.11577892]
|
|
|
|
mean value: 0.11818234920501709
|
|
|
|
key: score_time
|
|
value: [0.01781154 0.01825404 0.01940131 0.01899099 0.01792264 0.01776505
|
|
0.01769257 0.01761508 0.01792526 0.01881552]
|
|
|
|
mean value: 0.018219399452209472
|
|
|
|
key: test_mcc
|
|
value: [0.90662544 0.76034808 0.76942439 0.81048387 0.76942439 0.50421069
|
|
0.66402366 0.79930604 0.59332241 0.7073172 ]
|
|
|
|
mean value: 0.7284486166045462
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95744681 0.89361702 0.89361702 0.91489362 0.89361702 0.78723404
|
|
0.85106383 0.91304348 0.82608696 0.86956522]
|
|
|
|
mean value: 0.8800185013876041
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.92063492 0.92537313 0.93548387 0.92537313 0.85294118
|
|
0.88888889 0.9375 0.875 0.90322581]
|
|
|
|
mean value: 0.9133170932070469
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.90625 0.86111111 0.93548387 0.86111111 0.78378378
|
|
0.875 0.90909091 0.84848485 0.875 ]
|
|
|
|
mean value: 0.8794709573943444
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.93548387 1. 0.93548387 1. 0.93548387
|
|
0.90322581 0.96774194 0.90322581 0.93333333]
|
|
|
|
mean value: 0.9513978494623656
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.87399194 0.84375 0.90524194 0.84375 0.71774194
|
|
0.8266129 0.88387097 0.78494624 0.84166667]
|
|
|
|
mean value: 0.8459072580645162
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.85294118 0.86111111 0.87878788 0.86111111 0.74358974
|
|
0.8 0.88235294 0.77777778 0.82352941]
|
|
|
|
mean value: 0.8420595091183327
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.35
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01028204 0.01027584 0.01097631 0.01002836 0.01014161 0.01016474
|
|
0.00999165 0.01011252 0.01021099 0.01017547]
|
|
|
|
mean value: 0.010235953330993652
|
|
|
|
key: score_time
|
|
value: [0.00873184 0.00867677 0.00888753 0.00895524 0.0091784 0.00871277
|
|
0.00871825 0.0088079 0.00915551 0.00878716]
|
|
|
|
mean value: 0.008861136436462403
|
|
|
|
key: test_mcc
|
|
value: [0.47146788 0.66337469 0.30022788 0.48712471 0.50421069 0.13312621
|
|
0.43145161 0.47977675 0.23600897 0.61666667]
|
|
|
|
mean value: 0.43234360607693584
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.74468085 0.85106383 0.68085106 0.76595745 0.78723404 0.63829787
|
|
0.74468085 0.7826087 0.65217391 0.82608696]
|
|
|
|
mean value: 0.7473635522664199
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.79310345 0.89552239 0.75409836 0.81967213 0.85294118 0.74626866
|
|
0.80645161 0.84848485 0.73333333 0.86666667]
|
|
|
|
mean value: 0.8116542622713923
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85185185 0.83333333 0.76666667 0.83333333 0.78378378 0.69444444
|
|
0.80645161 0.8 0.75862069 0.86666667]
|
|
|
|
mean value: 0.7995152382638478
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.74193548 0.96774194 0.74193548 0.80645161 0.93548387 0.80645161
|
|
0.80645161 0.90322581 0.70967742 0.86666667]
|
|
|
|
mean value: 0.8286021505376344
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.74596774 0.79637097 0.65221774 0.74697581 0.71774194 0.55947581
|
|
0.71572581 0.71827957 0.62150538 0.80833333]
|
|
|
|
mean value: 0.7082594086021505
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.65714286 0.81081081 0.60526316 0.69444444 0.74358974 0.5952381
|
|
0.67567568 0.73684211 0.57894737 0.76470588]
|
|
|
|
mean value: 0.6862660140833515
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.78322387 1.70288587 1.75905633 1.67823339 1.76473522 1.75426316
|
|
1.67606902 1.68830752 1.72714567 1.77702308]
|
|
|
|
mean value: 1.7310943126678466
|
|
|
|
key: score_time
|
|
value: [0.0979917 0.09790587 0.09109807 0.09100127 0.09731817 0.0949614
|
|
0.09058595 0.09162045 0.09331584 0.09899282]
|
|
|
|
mean value: 0.0944791555404663
|
|
|
|
key: test_mcc
|
|
value: [0.95299692 0.86070252 0.90662544 0.95436677 0.90662544 0.81503725
|
|
0.91188882 0.90107527 0.90229785 0.80651412]
|
|
|
|
mean value: 0.8918130408374696
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9787234 0.93617021 0.95744681 0.9787234 0.95744681 0.91489362
|
|
0.95744681 0.95652174 0.95652174 0.91304348]
|
|
|
|
mean value: 0.9506938020351526
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.95384615 0.96875 0.98360656 0.96875 0.93939394
|
|
0.96666667 0.96774194 0.96875 0.93548387]
|
|
|
|
mean value: 0.9637116107862406
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.91176471 0.93939394 1. 0.93939394 0.88571429
|
|
1. 0.96774194 0.93939394 0.90625 ]
|
|
|
|
mean value: 0.9458402745262328
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.96774194 1. 1.
|
|
0.93548387 0.96774194 1. 0.96666667]
|
|
|
|
mean value: 0.983763440860215
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.90625 0.9375 0.98387097 0.9375 0.875
|
|
0.96774194 0.95053763 0.93333333 0.88958333]
|
|
|
|
mean value: 0.9350067204301076
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.91176471 0.93939394 0.96774194 0.93939394 0.88571429
|
|
0.93548387 0.9375 0.93939394 0.87878788]
|
|
|
|
mean value: 0.9303924495017949
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.21
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.85024405 1.00775695 1.08623052 1.03636193 0.93481803 1.00752449
|
|
0.95746088 0.99271202 0.97024059 1.0156703 ]
|
|
|
|
mean value: 1.0859019756317139
|
|
|
|
key: score_time
|
|
value: [0.22596669 0.28282785 0.23712778 0.22467446 0.17264795 0.14795899
|
|
0.25987649 0.2267096 0.27685618 0.23671722]
|
|
|
|
mean value: 0.229136323928833
|
|
|
|
key: test_mcc
|
|
value: [0.95299692 0.86070252 0.90662544 0.95299692 0.90662544 0.76942439
|
|
0.86091836 0.85009261 0.8059304 0.75806977]
|
|
|
|
mean value: 0.8624382772371741
|
|
|
|
key: train_mcc
|
|
value: [0.93680867 0.95249586 0.94725945 0.94725945 0.95249586 0.95249586
|
|
0.94725945 0.94751034 0.94751034 0.96303439]
|
|
|
|
mean value: 0.9494129679989223
|
|
|
|
key: test_accuracy
|
|
value: [0.9787234 0.93617021 0.95744681 0.9787234 0.95744681 0.89361702
|
|
0.93617021 0.93478261 0.91304348 0.89130435]
|
|
|
|
mean value: 0.9377428307123035
|
|
|
|
key: train_accuracy
|
|
value: [0.97142857 0.97857143 0.97619048 0.97619048 0.97857143 0.97857143
|
|
0.97619048 0.97624703 0.97624703 0.98337292]
|
|
|
|
mean value: 0.9771581269087207
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.95384615 0.96875 0.98412698 0.96875 0.92537313
|
|
0.95081967 0.95238095 0.93939394 0.92063492]
|
|
|
|
mean value: 0.954820274096944
|
|
|
|
key: train_fscore
|
|
value: [0.97887324 0.9840708 0.98233216 0.98233216 0.9840708 0.9840708
|
|
0.98233216 0.98233216 0.98233216 0.98761062]
|
|
|
|
mean value: 0.9830357025671337
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.91176471 0.93939394 0.96875 0.93939394 0.86111111
|
|
0.96666667 0.9375 0.88571429 0.87878788]
|
|
|
|
mean value: 0.9257832526950174
|
|
|
|
key: train_precision
|
|
value: [0.95862069 0.96864111 0.96527778 0.96527778 0.96864111 0.96864111
|
|
0.96527778 0.96527778 0.96527778 0.97552448]
|
|
|
|
mean value: 0.9666457399016273
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.93548387 0.96774194 1. 0.96666667]
|
|
|
|
mean value: 0.9869892473118279
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.90625 0.9375 0.96875 0.9375 0.84375
|
|
0.93649194 0.9172043 0.86666667 0.85833333]
|
|
|
|
mean value: 0.9141196236559139
|
|
|
|
key: train_roc_auc
|
|
value: [0.95774648 0.96830986 0.96478873 0.96478873 0.96830986 0.96830986
|
|
0.96478873 0.96503497 0.96503497 0.97535211]
|
|
|
|
mean value: 0.9662464296267113
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.91176471 0.93939394 0.96875 0.93939394 0.86111111
|
|
0.90625 0.90909091 0.88571429 0.85294118]
|
|
|
|
mean value: 0.9143160067057126
|
|
|
|
key: train_jcc
|
|
value: [0.95862069 0.96864111 0.96527778 0.96527778 0.96864111 0.96864111
|
|
0.96527778 0.96527778 0.96527778 0.97552448]
|
|
|
|
mean value: 0.9666457399016273
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.22
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02664924 0.01019883 0.010216 0.01071167 0.01019812 0.01019883
|
|
0.01137066 0.0105741 0.01056242 0.01117802]
|
|
|
|
mean value: 0.012185788154602051
|
|
|
|
key: score_time
|
|
value: [0.01141644 0.00912237 0.00920796 0.00904679 0.00896335 0.00902796
|
|
0.00981045 0.00913143 0.00937605 0.01008081]
|
|
|
|
mean value: 0.00951836109161377
|
|
|
|
key: test_mcc
|
|
value: [0.72715272 0.52620968 0.50611184 0.71025956 0.71206211 0.56329266
|
|
0.55956342 0.49033059 0.24538756 0.65669997]
|
|
|
|
mean value: 0.5697070127041126
|
|
|
|
key: train_mcc
|
|
value: [0.60428127 0.6506538 0.68534362 0.63499734 0.65670743 0.66210484
|
|
0.65614514 0.67599229 0.67555291 0.64020363]
|
|
|
|
mean value: 0.654198226500829
|
|
|
|
key: test_accuracy
|
|
value: [0.87234043 0.78723404 0.78723404 0.87234043 0.87234043 0.80851064
|
|
0.80851064 0.7826087 0.67391304 0.84782609]
|
|
|
|
mean value: 0.8112858464384829
|
|
|
|
key: train_accuracy
|
|
value: [0.82619048 0.84761905 0.86190476 0.84047619 0.85 0.85238095
|
|
0.85 0.85748219 0.85748219 0.8432304 ]
|
|
|
|
mean value: 0.8486766202918222
|
|
|
|
key: test_fscore
|
|
value: [0.9 0.83870968 0.84848485 0.90625 0.90909091 0.86956522
|
|
0.86153846 0.84375 0.76190476 0.88888889]
|
|
|
|
mean value: 0.8628182764718529
|
|
|
|
key: train_fscore
|
|
value: [0.87170475 0.88965517 0.8986014 0.88347826 0.89081456 0.89273356
|
|
0.89156627 0.8951049 0.89547038 0.8862069 ]
|
|
|
|
mean value: 0.8895336139116604
|
|
|
|
key: test_precision
|
|
value: [0.93103448 0.83870968 0.8 0.87878788 0.85714286 0.78947368
|
|
0.82352941 0.81818182 0.75 0.84848485]
|
|
|
|
mean value: 0.8335344658750611
|
|
|
|
key: train_precision
|
|
value: [0.85223368 0.85430464 0.87414966 0.85521886 0.85953177 0.86
|
|
0.85478548 0.8707483 0.86824324 0.8538206 ]
|
|
|
|
mean value: 0.8603036219513056
|
|
|
|
key: test_recall
|
|
value: [0.87096774 0.83870968 0.90322581 0.93548387 0.96774194 0.96774194
|
|
0.90322581 0.87096774 0.77419355 0.93333333]
|
|
|
|
mean value: 0.8965591397849463
|
|
|
|
key: train_recall
|
|
value: [0.89208633 0.92805755 0.92446043 0.91366906 0.92446043 0.92805755
|
|
0.93165468 0.92086331 0.92446043 0.92114695]
|
|
|
|
mean value: 0.920891673757768
|
|
|
|
key: test_roc_auc
|
|
value: [0.87298387 0.76310484 0.7328629 0.84274194 0.82762097 0.73387097
|
|
0.7641129 0.73548387 0.62043011 0.81041667]
|
|
|
|
mean value: 0.7703629032258065
|
|
|
|
key: train_roc_auc
|
|
value: [0.79463471 0.8090992 0.83194853 0.80542608 0.81434289 0.81614145
|
|
0.81089776 0.82756452 0.82586658 0.8056439 ]
|
|
|
|
mean value: 0.8141565627727084
|
|
|
|
key: test_jcc
|
|
value: [0.81818182 0.72222222 0.73684211 0.82857143 0.83333333 0.76923077
|
|
0.75675676 0.72972973 0.61538462 0.8 ]
|
|
|
|
mean value: 0.7610252778673832
|
|
|
|
key: train_jcc
|
|
value: [0.77258567 0.80124224 0.81587302 0.79127726 0.803125 0.80625
|
|
0.80434783 0.81012658 0.81072555 0.79566563]
|
|
|
|
mean value: 0.8011218775337603
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.1078825 0.06452894 0.10497737 0.06652641 0.06726933 0.06478691
|
|
0.07195568 0.22934031 0.06012368 0.06932712]
|
|
|
|
mean value: 0.09067182540893555
|
|
|
|
key: score_time
|
|
value: [0.01200962 0.01149297 0.01164556 0.0111506 0.01142526 0.01123977
|
|
0.01101708 0.01149511 0.01077366 0.01080608]
|
|
|
|
mean value: 0.011305570602416992
|
|
|
|
key: test_mcc
|
|
value: [0.95299692 0.8566725 1. 1. 0.90662544 0.8084425
|
|
1. 0.9085301 0.95087679 0.80833333]
|
|
|
|
mean value: 0.9192477588152645
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9787234 0.93617021 1. 1. 0.95744681 0.91489362
|
|
1. 0.95652174 0.97826087 0.91304348]
|
|
|
|
mean value: 0.9635060129509714
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.95238095 1. 1. 0.96875 0.9375
|
|
1. 0.96666667 0.98412698 0.93333333]
|
|
|
|
mean value: 0.9726884920634921
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.9375 1. 1. 0.93939394 0.90909091
|
|
1. 1. 0.96875 0.93333333]
|
|
|
|
mean value: 0.9656818181818182
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 1. 1. 0.96774194
|
|
1. 0.93548387 1. 0.93333333]
|
|
|
|
mean value: 0.9804301075268818
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.92137097 1. 1. 0.9375 0.89012097
|
|
1. 0.96774194 0.96666667 0.90416667]
|
|
|
|
mean value: 0.9556317204301076
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.90909091 1. 1. 0.93939394 0.88235294
|
|
1. 0.93548387 0.96875 0.875 ]
|
|
|
|
mean value: 0.9478821660629061
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.19
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04805613 0.04128599 0.06816578 0.05686855 0.07966805 0.05619812
|
|
0.08012176 0.05640578 0.06545734 0.05767536]
|
|
|
|
mean value: 0.06099028587341308
|
|
|
|
key: score_time
|
|
value: [0.01277566 0.01246333 0.01240373 0.03590488 0.02149439 0.01272893
|
|
0.02091289 0.01216292 0.02337241 0.01243329]
|
|
|
|
mean value: 0.01766524314880371
|
|
|
|
key: test_mcc
|
|
value: [0.81952077 0.71025956 1. 0.91188882 0.90524194 0.66337469
|
|
0.81048387 0.85513419 0.70322581 0.67015231]
|
|
|
|
mean value: 0.8049281962952315
|
|
|
|
key: train_mcc
|
|
value: [0.95736701 0.96269263 0.95736701 0.95199661 0.95199661 0.95736701
|
|
0.95736701 0.96823254 0.95756757 0.96812026]
|
|
|
|
mean value: 0.9590074282899193
|
|
|
|
key: test_accuracy
|
|
value: [0.91489362 0.87234043 1. 0.95744681 0.95744681 0.85106383
|
|
0.91489362 0.93478261 0.86956522 0.84782609]
|
|
|
|
mean value: 0.9120259019426458
|
|
|
|
key: train_accuracy
|
|
value: [0.98095238 0.98333333 0.98095238 0.97857143 0.97857143 0.98095238
|
|
0.98095238 0.98574822 0.98099762 0.98574822]
|
|
|
|
mean value: 0.9816779776043434
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.90625 1. 0.96666667 0.96774194 0.89552239
|
|
0.93548387 0.95081967 0.90322581 0.88135593]
|
|
|
|
mean value: 0.9340399605297465
|
|
|
|
key: train_fscore
|
|
value: [0.98571429 0.98747764 0.98571429 0.98389982 0.98389982 0.98571429
|
|
0.98571429 0.98928571 0.98571429 0.98932384]
|
|
|
|
mean value: 0.9862458267132189
|
|
|
|
key: test_precision
|
|
value: [0.96551724 0.87878788 1. 1. 0.96774194 0.83333333
|
|
0.93548387 0.96666667 0.90322581 0.89655172]
|
|
|
|
mean value: 0.9347308457208346
|
|
|
|
key: train_precision
|
|
value: [0.9787234 0.98220641 0.9787234 0.97864769 0.97864769 0.9787234
|
|
0.9787234 0.9822695 0.9787234 0.98233216]
|
|
|
|
mean value: 0.9797720459659157
|
|
|
|
key: test_recall
|
|
value: [0.90322581 0.93548387 1. 0.93548387 0.96774194 0.96774194
|
|
0.93548387 0.93548387 0.90322581 0.86666667]
|
|
|
|
mean value: 0.9350537634408602
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.99280576 0.99280576 0.98920863 0.98920863 0.99280576
|
|
0.99280576 0.99640288 0.99280576 0.99641577]
|
|
|
|
mean value: 0.9928070446868312
|
|
|
|
key: test_roc_auc
|
|
value: [0.9203629 0.84274194 1. 0.96774194 0.95262097 0.79637097
|
|
0.90524194 0.9344086 0.8516129 0.83958333]
|
|
|
|
mean value: 0.9010685483870968
|
|
|
|
key: train_roc_auc
|
|
value: [0.97527612 0.97879724 0.97527612 0.97347756 0.97347756 0.97527612
|
|
0.97527612 0.98071892 0.97542386 0.98060225]
|
|
|
|
mean value: 0.9763601853986702
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.82857143 1. 0.93548387 0.9375 0.81081081
|
|
0.87878788 0.90625 0.82352941 0.78787879]
|
|
|
|
mean value: 0.8783812188781354
|
|
|
|
key: train_jcc
|
|
value: [0.97183099 0.97526502 0.97183099 0.96830986 0.96830986 0.97183099
|
|
0.97183099 0.97879859 0.97183099 0.97887324]
|
|
|
|
mean value: 0.9728711491564227
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02241898 0.01315236 0.01071215 0.01068687 0.00957108 0.00949717
|
|
0.00947452 0.00976682 0.00959682 0.00956392]
|
|
|
|
mean value: 0.01144406795501709
|
|
|
|
key: score_time
|
|
value: [0.01186442 0.01024771 0.00938439 0.00939918 0.00869703 0.0087316
|
|
0.00866318 0.00860071 0.00864935 0.00871134]
|
|
|
|
mean value: 0.009294891357421875
|
|
|
|
key: test_mcc
|
|
value: [0.63478467 0.56769924 0.51389369 0.65994312 0.62096774 0.66337469
|
|
0.47137482 0.64852426 0.44695591 0.76764947]
|
|
|
|
mean value: 0.5995167617624015
|
|
|
|
key: train_mcc
|
|
value: [0.59337085 0.6486968 0.6639652 0.6261021 0.62393742 0.6523944
|
|
0.65818223 0.62211627 0.6562151 0.62652246]
|
|
|
|
mean value: 0.6371502844408112
|
|
|
|
key: test_accuracy
|
|
value: [0.82978723 0.80851064 0.78723404 0.85106383 0.82978723 0.85106383
|
|
0.76595745 0.84782609 0.76086957 0.89130435]
|
|
|
|
mean value: 0.8223404255319149
|
|
|
|
key: train_accuracy
|
|
value: [0.82142857 0.8452381 0.85238095 0.83571429 0.83571429 0.84761905
|
|
0.85 0.83372922 0.847981 0.83610451]
|
|
|
|
mean value: 0.8405909964936094
|
|
|
|
key: test_fscore
|
|
value: [0.86666667 0.85714286 0.84375 0.89230769 0.87096774 0.89552239
|
|
0.82539683 0.88888889 0.82539683 0.92307692]
|
|
|
|
mean value: 0.8689116808871864
|
|
|
|
key: train_fscore
|
|
value: [0.86818981 0.88536155 0.89122807 0.87873462 0.88 0.88811189
|
|
0.88966725 0.87719298 0.8869258 0.87915937]
|
|
|
|
mean value: 0.8824571336612159
|
|
|
|
key: test_precision
|
|
value: [0.89655172 0.84375 0.81818182 0.85294118 0.87096774 0.83333333
|
|
0.8125 0.875 0.8125 0.85714286]
|
|
|
|
mean value: 0.8472868651202012
|
|
|
|
key: train_precision
|
|
value: [0.84879725 0.86851211 0.86986301 0.85910653 0.85185185 0.86394558
|
|
0.8668942 0.85616438 0.87152778 0.85958904]
|
|
|
|
mean value: 0.8616251734964677
|
|
|
|
key: test_recall
|
|
value: [0.83870968 0.87096774 0.87096774 0.93548387 0.87096774 0.96774194
|
|
0.83870968 0.90322581 0.83870968 1. ]
|
|
|
|
mean value: 0.8935483870967742
|
|
|
|
key: train_recall
|
|
value: [0.88848921 0.9028777 0.91366906 0.89928058 0.91007194 0.91366906
|
|
0.91366906 0.89928058 0.9028777 0.89964158]
|
|
|
|
mean value: 0.9043526469147263
|
|
|
|
key: test_roc_auc
|
|
value: [0.82560484 0.77923387 0.74798387 0.81149194 0.81048387 0.79637097
|
|
0.73185484 0.81827957 0.71935484 0.84375 ]
|
|
|
|
mean value: 0.7884408602150538
|
|
|
|
key: train_roc_auc
|
|
value: [0.78931503 0.81763603 0.82303172 0.80527409 0.80010639 0.81598946
|
|
0.81951059 0.80278714 0.82206822 0.80545459]
|
|
|
|
mean value: 0.8101173261166756
|
|
|
|
key: test_jcc
|
|
value: [0.76470588 0.75 0.72972973 0.80555556 0.77142857 0.81081081
|
|
0.7027027 0.8 0.7027027 0.85714286]
|
|
|
|
mean value: 0.7694778812425871
|
|
|
|
key: train_jcc
|
|
value: [0.76708075 0.7943038 0.80379747 0.78369906 0.78571429 0.79874214
|
|
0.80126183 0.78125 0.7968254 0.784375 ]
|
|
|
|
mean value: 0.7897049721282987
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01426268 0.02361226 0.01946807 0.02516937 0.01924849 0.02545094
|
|
0.02460933 0.02311087 0.01817918 0.02519631]
|
|
|
|
mean value: 0.02183074951171875
|
|
|
|
key: score_time
|
|
value: [0.00876379 0.01119065 0.01151466 0.01174283 0.0117209 0.01172209
|
|
0.01216221 0.01199412 0.01224375 0.01191258]
|
|
|
|
mean value: 0.011496758460998536
|
|
|
|
key: test_mcc
|
|
value: [0.86070252 0.71572581 0.76032282 0.95299692 0.81952077 0.71206211
|
|
0.8084425 0.69956858 0.43161973 0.80651412]
|
|
|
|
mean value: 0.7567475880486182
|
|
|
|
key: train_mcc
|
|
value: [0.82321411 0.9627116 0.89833067 0.93097611 0.8239525 0.95734993
|
|
0.89402196 0.84253494 0.86216499 0.94174218]
|
|
|
|
mean value: 0.8936998993308193
|
|
|
|
key: test_accuracy
|
|
value: [0.93617021 0.87234043 0.89361702 0.9787234 0.91489362 0.87234043
|
|
0.91489362 0.84782609 0.76086957 0.91304348]
|
|
|
|
mean value: 0.8904717853839038
|
|
|
|
key: train_accuracy
|
|
value: [0.91904762 0.98333333 0.9547619 0.96904762 0.91428571 0.98095238
|
|
0.95238095 0.9216152 0.93824228 0.97387173]
|
|
|
|
mean value: 0.9507538739961543
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.90322581 0.92307692 0.98412698 0.93333333 0.90909091
|
|
0.9375 0.87719298 0.83076923 0.93548387]
|
|
|
|
mean value: 0.9187646194119029
|
|
|
|
key: train_fscore
|
|
value: [0.94237288 0.98743268 0.96625222 0.97657658 0.93207547 0.98566308
|
|
0.96503497 0.9373814 0.95470383 0.98059965]
|
|
|
|
mean value: 0.9628092756589914
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.90322581 0.88235294 0.96875 0.96551724 0.85714286
|
|
0.90909091 0.96153846 0.79411765 0.90625 ]
|
|
|
|
mean value: 0.9059750569720798
|
|
|
|
key: train_precision
|
|
value: [0.89102564 0.98566308 0.95438596 0.97833935 0.98015873 0.98214286
|
|
0.93877551 0.99196787 0.92567568 0.96527778]
|
|
|
|
mean value: 0.959341246100077
|
|
|
|
key: test_recall
|
|
value: [1. 0.90322581 0.96774194 1. 0.90322581 0.96774194
|
|
0.96774194 0.80645161 0.87096774 0.96666667]
|
|
|
|
mean value: 0.9353763440860215
|
|
|
|
key: train_recall
|
|
value: [1. 0.98920863 0.97841727 0.97482014 0.88848921 0.98920863
|
|
0.99280576 0.88848921 0.98561151 0.99641577]
|
|
|
|
mean value: 0.968346613032155
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.8578629 0.85887097 0.96875 0.9203629 0.82762097
|
|
0.89012097 0.86989247 0.70215054 0.88958333]
|
|
|
|
mean value: 0.8691465053763441
|
|
|
|
key: train_roc_auc
|
|
value: [0.88028169 0.98051981 0.94343399 0.96628331 0.92663897 0.97699868
|
|
0.9330226 0.9372516 0.91588268 0.96299662]
|
|
|
|
mean value: 0.942330993899117
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.82352941 0.85714286 0.96875 0.875 0.83333333
|
|
0.88235294 0.78125 0.71052632 0.87878788]
|
|
|
|
mean value: 0.8522437443877072
|
|
|
|
key: train_jcc
|
|
value: [0.89102564 0.9751773 0.9347079 0.95422535 0.87279152 0.97173145
|
|
0.93243243 0.88214286 0.91333333 0.96193772]
|
|
|
|
mean value: 0.9289505509252404
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.29
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01881099 0.01925945 0.01931882 0.02046704 0.01870441 0.01791883
|
|
0.01955533 0.02114439 0.01979399 0.02047253]
|
|
|
|
mean value: 0.019544577598571776
|
|
|
|
key: score_time
|
|
value: [0.01178288 0.01173759 0.01165867 0.01172376 0.01175308 0.01169109
|
|
0.01165581 0.01170611 0.011729 0.01172876]
|
|
|
|
mean value: 0.011716675758361817
|
|
|
|
key: test_mcc
|
|
value: [0.90524194 0.8084425 0.56329266 0.90524194 0.90662544 0.66337469
|
|
0.68913865 0.85513419 0.69721252 0.81348922]
|
|
|
|
mean value: 0.7807193745773244
|
|
|
|
key: train_mcc
|
|
value: [0.95199661 0.92638558 0.75648156 0.93060457 0.92557595 0.8841752
|
|
0.89514141 0.94728132 0.87441314 0.89469123]
|
|
|
|
mean value: 0.898674658892361
|
|
|
|
key: test_accuracy
|
|
value: [0.95744681 0.91489362 0.80851064 0.95744681 0.95744681 0.85106383
|
|
0.85106383 0.93478261 0.86956522 0.91304348]
|
|
|
|
mean value: 0.9015263644773358
|
|
|
|
key: train_accuracy
|
|
value: [0.97857143 0.96666667 0.88809524 0.96904762 0.96666667 0.94761905
|
|
0.95238095 0.97624703 0.94299287 0.95249406]
|
|
|
|
mean value: 0.9540781585793462
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.9375 0.86956522 0.96774194 0.96875 0.89552239
|
|
0.88135593 0.95081967 0.90909091 0.9375 ]
|
|
|
|
mean value: 0.9285587989844194
|
|
|
|
key: train_fscore
|
|
value: [0.98389982 0.9754386 0.92205638 0.97674419 0.97526502 0.96180556
|
|
0.96363636 0.98194946 0.95847751 0.96527778]
|
|
|
|
mean value: 0.966455067016163
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.90909091 0.78947368 0.96774194 0.93939394 0.83333333
|
|
0.92857143 0.96666667 0.85714286 0.88235294]
|
|
|
|
mean value: 0.9041509630553873
|
|
|
|
key: train_precision
|
|
value: [0.97864769 0.95205479 0.85538462 0.97153025 0.95833333 0.9295302
|
|
0.97426471 0.98550725 0.92333333 0.93602694]
|
|
|
|
mean value: 0.9464613102143273
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.96774194 0.96774194 1. 0.96774194
|
|
0.83870968 0.93548387 0.96774194 1. ]
|
|
|
|
mean value: 0.9580645161290323
|
|
|
|
key: train_recall
|
|
value: [0.98920863 1. 1. 0.98201439 0.99280576 0.99640288
|
|
0.95323741 0.97841727 0.99640288 0.99641577]
|
|
|
|
mean value: 0.9884904979242413
|
|
|
|
key: test_roc_auc
|
|
value: [0.95262097 0.89012097 0.73387097 0.95262097 0.9375 0.79637097
|
|
0.85685484 0.9344086 0.8172043 0.875 ]
|
|
|
|
mean value: 0.8746572580645161
|
|
|
|
key: train_roc_auc
|
|
value: [0.97347756 0.95070423 0.83450704 0.96283818 0.95414936 0.92425778
|
|
0.95197082 0.97522262 0.91778186 0.93130648]
|
|
|
|
mean value: 0.9376215909300119
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.88235294 0.76923077 0.9375 0.93939394 0.81081081
|
|
0.78787879 0.90625 0.83333333 0.88235294]
|
|
|
|
mean value: 0.8686603523000582
|
|
|
|
key: train_jcc
|
|
value: [0.96830986 0.95205479 0.85538462 0.95454545 0.95172414 0.9264214
|
|
0.92982456 0.96453901 0.92026578 0.93288591]
|
|
|
|
mean value: 0.9355955521485729
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17678094 0.16169643 0.16195178 0.16259241 0.16251469 0.16290808
|
|
0.16300702 0.1635623 0.16283536 0.16640735]
|
|
|
|
mean value: 0.16442563533782958
|
|
|
|
key: score_time
|
|
value: [0.01511002 0.01516867 0.01555228 0.01531434 0.01529408 0.01527691
|
|
0.01535225 0.01529121 0.01536965 0.01541305]
|
|
|
|
mean value: 0.015314245223999023
|
|
|
|
key: test_mcc
|
|
value: [0.95299692 0.8566725 1. 1. 0.90662544 0.81503725
|
|
0.91188882 0.95250095 0.95087679 0.85513419]
|
|
|
|
mean value: 0.9201732856062123
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9787234 0.93617021 1. 1. 0.95744681 0.91489362
|
|
0.95744681 0.97826087 0.97826087 0.93478261]
|
|
|
|
mean value: 0.9635985198889917
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.95238095 1. 1. 0.96875 0.93939394
|
|
0.96666667 0.98360656 0.98412698 0.95081967]
|
|
|
|
mean value: 0.9729871756203723
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.9375 1. 1. 0.93939394 0.88571429
|
|
1. 1. 0.96875 0.93548387]
|
|
|
|
mean value: 0.9635592096075967
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 1. 1. 1.
|
|
0.93548387 0.96774194 1. 0.96666667]
|
|
|
|
mean value: 0.983763440860215
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.92137097 1. 1. 0.9375 0.875
|
|
0.96774194 0.98387097 0.96666667 0.92083333]
|
|
|
|
mean value: 0.9541733870967742
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.90909091 1. 1. 0.93939394 0.88571429
|
|
0.93548387 0.96774194 0.96875 0.90625 ]
|
|
|
|
mean value: 0.9481174940650747
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06447458 0.06540036 0.06229663 0.08273578 0.07696605 0.0617063
|
|
0.0600605 0.06062365 0.06386662 0.07482314]
|
|
|
|
mean value: 0.06729536056518555
|
|
|
|
key: score_time
|
|
value: [0.03304315 0.03502083 0.03025556 0.03909683 0.02619481 0.04146361
|
|
0.02470899 0.02245951 0.03020692 0.02624965]
|
|
|
|
mean value: 0.03086998462677002
|
|
|
|
key: test_mcc
|
|
value: [0.95299692 0.8566725 0.95299692 1. 0.90662544 0.81503725
|
|
0.87213027 0.95250095 0.95087679 0.77787176]
|
|
|
|
mean value: 0.9037708791663533
|
|
|
|
key: train_mcc
|
|
value: [0.97879832 1. 0.99468526 0.98945277 0.97870346 0.98938023
|
|
0.99470349 0.98950083 0.98940987 0.98946562]
|
|
|
|
mean value: 0.9894099843463138
|
|
|
|
key: test_accuracy
|
|
value: [0.9787234 0.93617021 0.9787234 1. 0.95744681 0.91489362
|
|
0.93617021 0.97826087 0.97826087 0.89130435]
|
|
|
|
mean value: 0.954995374653099
|
|
|
|
key: train_accuracy
|
|
value: [0.99047619 1. 0.99761905 0.9952381 0.99047619 0.9952381
|
|
0.99761905 0.99524941 0.99524941 0.99524941]
|
|
|
|
mean value: 0.9952414885193983
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.95238095 0.98412698 1. 0.96875 0.93939394
|
|
0.94915254 0.98360656 0.98412698 0.9122807 ]
|
|
|
|
mean value: 0.965794564566016
|
|
|
|
key: train_fscore
|
|
value: [0.99285714 1. 0.99820467 0.99638989 0.99283154 0.99641577
|
|
0.9981982 0.99638989 0.99640288 0.99640288]
|
|
|
|
mean value: 0.9964092859536038
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.9375 0.96875 1. 0.93939394 0.88571429
|
|
1. 1. 0.96875 0.96296296]
|
|
|
|
mean value: 0.9631821188071188
|
|
|
|
key: train_precision
|
|
value: [0.9858156 1. 0.99641577 1. 0.98928571 0.99285714
|
|
1. 1. 0.99640288 1. ]
|
|
|
|
mean value: 0.9960777108286898
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 1. 1. 1.
|
|
0.90322581 0.96774194 1. 0.86666667]
|
|
|
|
mean value: 0.9705376344086022
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.99280576 0.99640288 1.
|
|
0.99640288 0.99280576 0.99640288 0.99283154]
|
|
|
|
mean value: 0.996765168510353
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.92137097 0.96875 1. 0.9375 0.875
|
|
0.9516129 0.98387097 0.96666667 0.90208333]
|
|
|
|
mean value: 0.9475604838709677
|
|
|
|
key: train_roc_auc
|
|
value: [0.98591549 1. 0.99647887 0.99640288 0.98763806 0.99295775
|
|
0.99820144 0.99640288 0.99470494 0.99641577]
|
|
|
|
mean value: 0.9945118071449628
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.90909091 0.96875 1. 0.93939394 0.88571429
|
|
0.90322581 0.96774194 0.96875 0.83870968]
|
|
|
|
mean value: 0.9350126553553972
|
|
|
|
key: train_jcc
|
|
value: [0.9858156 1. 0.99641577 0.99280576 0.98576512 0.99285714
|
|
0.99640288 0.99280576 0.99283154 0.99283154]
|
|
|
|
mean value: 0.9928531111784986
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.19
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17185569 0.10447001 0.11902738 0.11162257 0.14436507 0.15038991
|
|
0.14318299 0.14804506 0.16088104 0.14451122]
|
|
|
|
mean value: 0.13983509540557862
|
|
|
|
key: score_time
|
|
value: [0.02366495 0.01449943 0.01455712 0.02399635 0.02356625 0.02375555
|
|
0.02349377 0.02348185 0.02342916 0.02345991]
|
|
|
|
mean value: 0.021790432929992675
|
|
|
|
key: test_mcc
|
|
value: [0.6139232 0.44917734 0.56329266 0.60908698 0.71206211 0.39449818
|
|
0.34522561 0.53722882 0.19552949 0.55533018]
|
|
|
|
mean value: 0.4975354557722252
|
|
|
|
key: train_mcc
|
|
value: [0.95773996 0.93680867 0.94725945 0.94203047 0.94203047 0.96825224
|
|
0.93680867 0.9527212 0.9527212 0.95778798]
|
|
|
|
mean value: 0.9494160325679993
|
|
|
|
key: test_accuracy
|
|
value: [0.82978723 0.76595745 0.80851064 0.82978723 0.87234043 0.74468085
|
|
0.72340426 0.80434783 0.67391304 0.80434783]
|
|
|
|
mean value: 0.7857076780758557
|
|
|
|
key: train_accuracy
|
|
value: [0.98095238 0.97142857 0.97619048 0.97380952 0.97380952 0.98571429
|
|
0.97142857 0.97862233 0.97862233 0.98099762]
|
|
|
|
mean value: 0.9771575613618368
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.84057971 0.86956522 0.87878788 0.90909091 0.82352941
|
|
0.80597015 0.86153846 0.7761194 0.85714286]
|
|
|
|
mean value: 0.8504676939276321
|
|
|
|
key: train_fscore
|
|
value: [0.9858156 0.97887324 0.98233216 0.98059965 0.98059965 0.98932384
|
|
0.97887324 0.9840708 0.9840708 0.98586572]
|
|
|
|
mean value: 0.9830424692438128
|
|
|
|
key: test_precision
|
|
value: [0.81081081 0.76315789 0.78947368 0.82857143 0.85714286 0.75675676
|
|
0.75 0.82352941 0.72222222 0.81818182]
|
|
|
|
mean value: 0.7919846884397969
|
|
|
|
key: train_precision
|
|
value: [0.97202797 0.95862069 0.96527778 0.96193772 0.96193772 0.97887324
|
|
0.95862069 0.96864111 0.96864111 0.97212544]
|
|
|
|
mean value: 0.9666703466583892
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 0.96774194 0.93548387 0.96774194 0.90322581
|
|
0.87096774 0.90322581 0.83870968 0.9 ]
|
|
|
|
mean value: 0.9190322580645162
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76512097 0.68649194 0.73387097 0.78024194 0.82762097 0.6703629
|
|
0.65423387 0.7516129 0.58602151 0.7625 ]
|
|
|
|
mean value: 0.7218077956989247
|
|
|
|
key: train_roc_auc
|
|
value: [0.97183099 0.95774648 0.96478873 0.96126761 0.96126761 0.97887324
|
|
0.95774648 0.96853147 0.96853147 0.97183099]
|
|
|
|
mean value: 0.9662415049738993
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.725 0.76923077 0.78378378 0.83333333 0.7
|
|
0.675 0.75675676 0.63414634 0.75 ]
|
|
|
|
mean value: 0.7416724668778584
|
|
|
|
key: train_jcc
|
|
value: [0.97202797 0.95862069 0.96527778 0.96193772 0.96193772 0.97887324
|
|
0.95862069 0.96864111 0.96864111 0.97212544]
|
|
|
|
mean value: 0.9666703466583892
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.35
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.63199282 0.64057088 0.6330502 0.62381983 0.62570333 0.63321471
|
|
0.6462667 0.64404368 0.64453101 0.61528659]
|
|
|
|
mean value: 0.633847975730896
|
|
|
|
key: score_time
|
|
value: [0.00964928 0.01027107 0.00939989 0.01029348 0.01047158 0.00946736
|
|
0.01022267 0.010427 0.00941825 0.00946999]
|
|
|
|
mean value: 0.0099090576171875
|
|
|
|
key: test_mcc
|
|
value: [0.95299692 0.8566725 1. 0.95436677 0.90662544 0.81503725
|
|
1. 0.95250095 0.95087679 0.7125 ]
|
|
|
|
mean value: 0.9101576622344475
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9787234 0.93617021 1. 0.9787234 0.95744681 0.91489362
|
|
1. 0.97826087 0.97826087 0.86956522]
|
|
|
|
mean value: 0.959204440333025
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.95238095 1. 0.98360656 0.96875 0.93939394
|
|
1. 0.98360656 0.98412698 0.9 ]
|
|
|
|
mean value: 0.9695991974782958
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.9375 1. 1. 0.93939394 0.88571429
|
|
1. 1. 0.96875 0.9 ]
|
|
|
|
mean value: 0.9600108225108225
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 0.96774194 1. 1.
|
|
1. 0.96774194 1. 0.9 ]
|
|
|
|
mean value: 0.9803225806451613
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.92137097 1. 0.98387097 0.9375 0.875
|
|
1. 0.98387097 0.96666667 0.85625 ]
|
|
|
|
mean value: 0.9493279569892473
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.90909091 1. 0.96774194 0.93939394 0.88571429
|
|
1. 0.96774194 0.96875 0.81818182]
|
|
|
|
mean value: 0.9425364823348694
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03014874 0.03859639 0.02921605 0.02919149 0.02927637 0.02877808
|
|
0.02914238 0.0289011 0.02861476 0.03231573]
|
|
|
|
mean value: 0.03041810989379883
|
|
|
|
key: score_time
|
|
value: [0.01298356 0.02056313 0.01420951 0.01520872 0.01517725 0.015167
|
|
0.01596665 0.01756454 0.0154233 0.01316094]
|
|
|
|
mean value: 0.01554245948791504
|
|
|
|
key: test_mcc
|
|
value: [-0.00390816 -0.18759162 -0.10225003 0.33463647 0.42453805 -0.18759162
|
|
-0.00572561 -0.03648678 -0.05008953 0.0382546 ]
|
|
|
|
mean value: 0.022378577955517495
|
|
|
|
key: train_mcc
|
|
value: [0.2960748 0.28737578 0.2960748 0.28737578 0.26927519 0.32099733
|
|
0.32896374 0.30312249 0.29467148 0.28753566]
|
|
|
|
mean value: 0.2971467060046902
|
|
|
|
key: test_accuracy
|
|
value: [0.63829787 0.59574468 0.59574468 0.72340426 0.74468085 0.59574468
|
|
0.61702128 0.60869565 0.63043478 0.63043478]
|
|
|
|
mean value: 0.6380203515263645
|
|
|
|
key: train_accuracy
|
|
value: [0.7047619 0.70238095 0.7047619 0.70238095 0.69761905 0.71190476
|
|
0.71428571 0.70546318 0.70308789 0.70308789]
|
|
|
|
mean value: 0.7049734192964596
|
|
|
|
key: test_fscore
|
|
value: [0.77333333 0.74666667 0.73972603 0.82191781 0.83783784 0.74666667
|
|
0.75 0.74285714 0.76712329 0.76056338]
|
|
|
|
mean value: 0.7686692150931008
|
|
|
|
key: train_fscore
|
|
value: [0.81764706 0.8164464 0.81764706 0.8164464 0.81405564 0.82127031
|
|
0.82248521 0.81764706 0.8164464 0.81698389]
|
|
|
|
mean value: 0.8177075432290432
|
|
|
|
key: test_precision
|
|
value: [0.65909091 0.63636364 0.64285714 0.71428571 0.72093023 0.63636364
|
|
0.65853659 0.66666667 0.66666667 0.65853659]
|
|
|
|
mean value: 0.6660297775584219
|
|
|
|
key: train_precision
|
|
value: [0.69154229 0.6898263 0.69154229 0.6898263 0.68641975 0.69674185
|
|
0.69849246 0.69154229 0.6898263 0.69059406]
|
|
|
|
mean value: 0.6916353903300737
|
|
|
|
key: test_recall
|
|
value: [0.93548387 0.90322581 0.87096774 0.96774194 1. 0.90322581
|
|
0.87096774 0.83870968 0.90322581 0.9 ]
|
|
|
|
mean value: 0.9093548387096774
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.49899194 0.4516129 0.46673387 0.60887097 0.625 0.4516129
|
|
0.49798387 0.48602151 0.48494624 0.5125 ]
|
|
|
|
mean value: 0.5084274193548387
|
|
|
|
key: train_roc_auc
|
|
value: [0.56338028 0.55985915 0.56338028 0.55985915 0.5528169 0.57394366
|
|
0.57746479 0.56643357 0.56293706 0.55985915]
|
|
|
|
mean value: 0.563993400965232
|
|
|
|
key: test_jcc
|
|
value: [0.63043478 0.59574468 0.58695652 0.69767442 0.72093023 0.59574468
|
|
0.6 0.59090909 0.62222222 0.61363636]
|
|
|
|
mean value: 0.6254252993980421
|
|
|
|
key: train_jcc
|
|
value: [0.69154229 0.6898263 0.69154229 0.6898263 0.68641975 0.69674185
|
|
0.69849246 0.69154229 0.6898263 0.69059406]
|
|
|
|
mean value: 0.6916353903300737
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.15
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02983284 0.03181815 0.03235269 0.03182912 0.03201151 0.031914
|
|
0.03235412 0.03964448 0.04482198 0.0336349 ]
|
|
|
|
mean value: 0.03402137756347656
|
|
|
|
key: score_time
|
|
value: [0.02541828 0.02288342 0.02519679 0.02281809 0.02517581 0.02304912
|
|
0.02497959 0.025208 0.02368593 0.02489567]
|
|
|
|
mean value: 0.024331068992614745
|
|
|
|
key: test_mcc
|
|
value: [0.90662544 0.8084425 0.90662544 0.95299692 0.90662544 0.6139232
|
|
0.81048387 0.80215054 0.59332241 0.7073172 ]
|
|
|
|
mean value: 0.8008512976244041
|
|
|
|
key: train_mcc
|
|
value: [0.93614376 0.93614376 0.91503448 0.93085643 0.93085643 0.93085643
|
|
0.92030205 0.93149626 0.92041993 0.94174218]
|
|
|
|
mean value: 0.9293851718860523
|
|
|
|
key: test_accuracy
|
|
value: [0.95744681 0.91489362 0.95744681 0.9787234 0.95744681 0.82978723
|
|
0.91489362 0.91304348 0.82608696 0.86956522]
|
|
|
|
mean value: 0.9119333950046253
|
|
|
|
key: train_accuracy
|
|
value: [0.97142857 0.97142857 0.96190476 0.96904762 0.96904762 0.96904762
|
|
0.96428571 0.96912114 0.96437055 0.97387173]
|
|
|
|
mean value: 0.968355389661803
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.9375 0.96875 0.98412698 0.96875 0.88235294
|
|
0.93548387 0.93548387 0.875 0.90322581]
|
|
|
|
mean value: 0.9359423473690551
|
|
|
|
key: train_fscore
|
|
value: [0.9787234 0.9787234 0.97183099 0.97699115 0.97699115 0.97699115
|
|
0.97354497 0.97707231 0.97345133 0.98059965]
|
|
|
|
mean value: 0.9764919504404125
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.90909091 0.93939394 0.96875 0.93939394 0.81081081
|
|
0.93548387 0.93548387 0.84848485 0.875 ]
|
|
|
|
mean value: 0.910128612850387
|
|
|
|
key: train_precision
|
|
value: [0.96503497 0.96503497 0.95172414 0.96167247 0.96167247 0.96167247
|
|
0.9550173 0.95847751 0.95818815 0.96527778]
|
|
|
|
mean value: 0.9603772230380215
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 1. 1. 0.96774194
|
|
0.93548387 0.93548387 0.90322581 0.93333333]
|
|
|
|
mean value: 0.9643010752688173
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.99280576 0.99280576 0.99280576 0.99280576 0.99280576
|
|
0.99280576 0.99640288 0.98920863 0.99641577]
|
|
|
|
mean value: 0.993166756917047
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.89012097 0.9375 0.96875 0.9375 0.76512097
|
|
0.90524194 0.90107527 0.78494624 0.84166667]
|
|
|
|
mean value: 0.8869422043010753
|
|
|
|
key: train_roc_auc
|
|
value: [0.96119161 0.96119161 0.9471071 0.95767048 0.95767048 0.95767048
|
|
0.95062823 0.9562434 0.95264627 0.96299662]
|
|
|
|
mean value: 0.9565016292218447
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.88235294 0.93939394 0.96875 0.93939394 0.78947368
|
|
0.87878788 0.87878788 0.77777778 0.82352941]
|
|
|
|
mean value: 0.8817641390687057
|
|
|
|
key: train_jcc
|
|
value: [0.95833333 0.95833333 0.94520548 0.9550173 0.9550173 0.9550173
|
|
0.94845361 0.95517241 0.94827586 0.96193772]
|
|
|
|
mean value: 0.9540763649605376
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.27491093 0.27264166 0.27324533 0.27130818 0.28760481 0.28644633
|
|
0.33207846 0.28306985 0.2727232 0.27460504]
|
|
|
|
mean value: 0.2828633785247803
|
|
|
|
key: score_time
|
|
value: [0.02479482 0.02375317 0.02303386 0.02578378 0.0250814 0.02236199
|
|
0.02544141 0.02499795 0.02385545 0.02501225]
|
|
|
|
mean value: 0.024411606788635253
|
|
|
|
key: test_mcc
|
|
value: [0.86091836 0.71025956 0.95299692 0.95436677 0.90662544 0.81503725
|
|
0.8566725 0.81245565 0.74844698 0.75776742]
|
|
|
|
mean value: 0.8375546868374703
|
|
|
|
key: train_mcc
|
|
value: [0.95204958 0.95204958 0.94674008 0.94131391 0.94131391 0.95204958
|
|
0.94674008 0.95769694 0.9469923 0.96812026]
|
|
|
|
mean value: 0.9505066231143429
|
|
|
|
key: test_accuracy
|
|
value: [0.93617021 0.87234043 0.9787234 0.9787234 0.95744681 0.91489362
|
|
0.93617021 0.91304348 0.89130435 0.89130435]
|
|
|
|
mean value: 0.9270120259019426
|
|
|
|
key: train_accuracy
|
|
value: [0.97857143 0.97857143 0.97619048 0.97380952 0.97380952 0.97857143
|
|
0.97619048 0.98099762 0.97624703 0.98574822]
|
|
|
|
mean value: 0.977870715982355
|
|
|
|
key: test_fscore
|
|
value: [0.95081967 0.90625 0.98412698 0.98360656 0.96875 0.93939394
|
|
0.95238095 0.93333333 0.92307692 0.91803279]
|
|
|
|
mean value: 0.9459771148705575
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:114: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:117: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.98395722 0.98395722 0.98220641 0.98039216 0.98039216 0.98395722
|
|
0.98220641 0.98576512 0.98220641 0.98932384]
|
|
|
|
mean value: 0.9834364156532882
|
|
|
|
key: test_precision
|
|
value: [0.96666667 0.87878788 0.96875 1. 0.93939394 0.88571429
|
|
0.9375 0.96551724 0.88235294 0.90322581]
|
|
|
|
mean value: 0.9327908759570165
|
|
|
|
key: train_precision
|
|
value: [0.97526502 0.97526502 0.97183099 0.97173145 0.97173145 0.97526502
|
|
0.97183099 0.97535211 0.97183099 0.98233216]
|
|
|
|
mean value: 0.9742435176429602
|
|
|
|
key: test_recall
|
|
value: [0.93548387 0.93548387 1. 0.96774194 1. 1.
|
|
0.96774194 0.90322581 0.96774194 0.93333333]
|
|
|
|
mean value: 0.9610752688172043
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.99280576 0.99280576 0.98920863 0.98920863 0.99280576
|
|
0.99280576 0.99640288 0.99280576 0.99641577]
|
|
|
|
mean value: 0.9928070446868312
|
|
|
|
key: test_roc_auc
|
|
value: [0.93649194 0.84274194 0.96875 0.98387097 0.9375 0.875
|
|
0.92137097 0.91827957 0.85053763 0.87291667]
|
|
|
|
mean value: 0.9107459677419355
|
|
|
|
key: train_roc_auc
|
|
value: [0.97175499 0.97175499 0.96823386 0.9664353 0.9664353 0.97175499
|
|
0.96823386 0.97372591 0.96843085 0.98060225]
|
|
|
|
mean value: 0.9707362318873928
|
|
|
|
key: test_jcc
|
|
value: [0.90625 0.82857143 0.96875 0.96774194 0.93939394 0.88571429
|
|
0.90909091 0.875 0.85714286 0.84848485]
|
|
|
|
mean value: 0.8986140203882139
|
|
|
|
key: train_jcc
|
|
value: [0.96842105 0.96842105 0.96503497 0.96153846 0.96153846 0.96842105
|
|
0.96503497 0.97192982 0.96503497 0.97887324]
|
|
|
|
mean value: 0.9674248040074578
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.41
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03594327 0.03823495 0.03745127 0.03787708 0.03765678 0.03796601
|
|
0.03775644 0.04696417 0.07437563 0.07606626]
|
|
|
|
mean value: 0.046029186248779295
|
|
|
|
key: score_time
|
|
value: [0.01225424 0.01527619 0.01508021 0.01502061 0.01554465 0.01481938
|
|
0.01508856 0.02117872 0.01212502 0.01207471]
|
|
|
|
mean value: 0.014846229553222656
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.74348441 0.80813523 0.84266484 0.87096774 0.87096774
|
|
0.77459667 0.84266484 0.70537634 0.80516731]
|
|
|
|
mean value: 0.8232270968732767
|
|
|
|
key: train_mcc
|
|
value: [0.88143754 0.8705036 0.87070641 0.86695696 0.85646981 0.85265591
|
|
0.87070641 0.86366703 0.86042111 0.86714973]
|
|
|
|
mean value: 0.8660674512400568
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.87096774 0.90322581 0.91935484 0.93548387 0.93548387
|
|
0.88709677 0.91935484 0.85245902 0.90163934]
|
|
|
|
mean value: 0.9108937070333156
|
|
|
|
key: train_accuracy
|
|
value: [0.94064748 0.9352518 0.9352518 0.93345324 0.92805755 0.92625899
|
|
0.9352518 0.93165468 0.92998205 0.93357271]
|
|
|
|
mean value: 0.9329382095759657
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.875 0.90625 0.92307692 0.93548387 0.93548387
|
|
0.8852459 0.91525424 0.85245902 0.90322581]
|
|
|
|
mean value: 0.9115606610911926
|
|
|
|
key: train_fscore
|
|
value: [0.94117647 0.9352518 0.93594306 0.93381038 0.92907801 0.92691622
|
|
0.93594306 0.93262411 0.93097345 0.93381038]
|
|
|
|
mean value: 0.9335526941508385
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.84848485 0.87878788 0.88235294 0.93548387 0.93548387
|
|
0.9 0.96428571 0.86666667 0.875 ]
|
|
|
|
mean value: 0.9055295791337062
|
|
|
|
key: train_precision
|
|
value: [0.93286219 0.9352518 0.92605634 0.92882562 0.91608392 0.91872792
|
|
0.92605634 0.91958042 0.91637631 0.93214286]
|
|
|
|
mean value: 0.9251963702827759
|
|
|
|
key: test_recall
|
|
value: [1. 0.90322581 0.93548387 0.96774194 0.93548387 0.93548387
|
|
0.87096774 0.87096774 0.83870968 0.93333333]
|
|
|
|
mean value: 0.9191397849462366
|
|
|
|
key: train_recall
|
|
value: [0.94964029 0.9352518 0.94604317 0.93884892 0.94244604 0.9352518
|
|
0.94604317 0.94604317 0.94604317 0.93548387]
|
|
|
|
mean value: 0.9421095381759109
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.87096774 0.90322581 0.91935484 0.93548387 0.93548387
|
|
0.88709677 0.91935484 0.85268817 0.90215054]
|
|
|
|
mean value: 0.9109677419354839
|
|
|
|
key: train_roc_auc
|
|
value: [0.94064748 0.9352518 0.9352518 0.93345324 0.92805755 0.92625899
|
|
0.9352518 0.93165468 0.93001083 0.93356927]
|
|
|
|
mean value: 0.9329407441788504
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.77777778 0.82857143 0.85714286 0.87878788 0.87878788
|
|
0.79411765 0.84375 0.74285714 0.82352941]
|
|
|
|
mean value: 0.8394072022748493
|
|
|
|
key: train_jcc
|
|
value: [0.88888889 0.87837838 0.87959866 0.87583893 0.86754967 0.86378738
|
|
0.87959866 0.87375415 0.87086093 0.87583893]
|
|
|
|
mean value: 0.8754094568296669
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.91726518 0.8967886 1.0972681 0.90807962 1.03316021 0.91765285
|
|
1.06892157 0.88056183 0.8810854 0.9651711 ]
|
|
|
|
mean value: 0.9565954446792603
|
|
|
|
key: score_time
|
|
value: [0.01491475 0.01541138 0.0154171 0.0152595 0.01555252 0.01543021
|
|
0.01531744 0.01531911 0.02811074 0.01530552]
|
|
|
|
mean value: 0.016603827476501465
|
|
|
|
key: test_mcc
|
|
value: [0.87096774 0.87278605 0.96824584 0.93743687 0.90369611 0.90369611
|
|
0.93743687 0.90748521 0.77096774 0.8688172 ]
|
|
|
|
mean value: 0.8941535747226808
|
|
|
|
key: train_mcc
|
|
value: [0.98561151 0.97841727 1. 0.98202074 1. 0.97841727
|
|
0.97124816 0.99640932 0.97845594 1. ]
|
|
|
|
mean value: 0.9870580210483318
|
|
|
|
key: test_accuracy
|
|
value: [0.93548387 0.93548387 0.98387097 0.96774194 0.9516129 0.9516129
|
|
0.96774194 0.9516129 0.8852459 0.93442623]
|
|
|
|
mean value: 0.9464833421470121
|
|
|
|
key: train_accuracy
|
|
value: [0.99280576 0.98920863 1. 0.99100719 1. 0.98920863
|
|
0.98561151 0.99820144 0.98922801 1. ]
|
|
|
|
mean value: 0.9935271172648954
|
|
|
|
key: test_fscore
|
|
value: [0.93548387 0.9375 0.98360656 0.96875 0.95081967 0.95238095
|
|
0.96666667 0.94915254 0.8852459 0.93333333]
|
|
|
|
mean value: 0.9462939496869116
|
|
|
|
key: train_fscore
|
|
value: [0.99280576 0.98920863 1. 0.99099099 1. 0.98920863
|
|
0.98555957 0.9981982 0.98920863 1. ]
|
|
|
|
mean value: 0.9935180410652452
|
|
|
|
key: test_precision
|
|
value: [0.93548387 0.90909091 1. 0.93939394 0.96666667 0.9375
|
|
1. 1. 0.9 0.93333333]
|
|
|
|
mean value: 0.952146871945259
|
|
|
|
key: train_precision
|
|
value: [0.99280576 0.98920863 1. 0.99277978 1. 0.98920863
|
|
0.98913043 1. 0.98920863 1. ]
|
|
|
|
mean value: 0.9942341872852369
|
|
|
|
key: test_recall
|
|
value: [0.93548387 0.96774194 0.96774194 1. 0.93548387 0.96774194
|
|
0.93548387 0.90322581 0.87096774 0.93333333]
|
|
|
|
mean value: 0.9417204301075268
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.98920863 1. 0.98920863 1. 0.98920863
|
|
0.98201439 0.99640288 0.98920863 1. ]
|
|
|
|
mean value: 0.9928057553956835
|
|
|
|
key: test_roc_auc
|
|
value: [0.93548387 0.93548387 0.98387097 0.96774194 0.9516129 0.9516129
|
|
0.96774194 0.9516129 0.88548387 0.9344086 ]
|
|
|
|
mean value: 0.946505376344086
|
|
|
|
key: train_roc_auc
|
|
value: [0.99280576 0.98920863 1. 0.99100719 1. 0.98920863
|
|
0.98561151 0.99820144 0.98922797 1. ]
|
|
|
|
mean value: 0.9935271137928368
|
|
|
|
key: test_jcc
|
|
value: [0.87878788 0.88235294 0.96774194 0.93939394 0.90625 0.90909091
|
|
0.93548387 0.90322581 0.79411765 0.875 ]
|
|
|
|
mean value: 0.8991444928411247
|
|
|
|
key: train_jcc
|
|
value: [0.98571429 0.97864769 1. 0.98214286 1. 0.97864769
|
|
0.97153025 0.99640288 0.97864769 1. ]
|
|
|
|
mean value: 0.9871733330163526
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01596975 0.01088405 0.01051188 0.01025772 0.01047158 0.01051545
|
|
0.01049328 0.07161856 0.01046991 0.01163387]
|
|
|
|
mean value: 0.017282605171203613
|
|
|
|
key: score_time
|
|
value: [0.01237273 0.00940228 0.00911212 0.00896454 0.00897431 0.00900674
|
|
0.0089376 0.00923395 0.00951004 0.00992632]
|
|
|
|
mean value: 0.009544062614440917
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.54953196 0.64820372 0.59603956 0.55301004 0.75623534
|
|
0.5483871 0.64820372 0.52020635 0.64178842]
|
|
|
|
mean value: 0.5978003999385197
|
|
|
|
key: train_mcc
|
|
value: [0.58619138 0.63442478 0.64790132 0.63414469 0.61309946 0.62982654
|
|
0.61973231 0.60119024 0.60212461 0.64171147]
|
|
|
|
mean value: 0.6210346806411129
|
|
|
|
key: test_accuracy
|
|
value: [0.75806452 0.77419355 0.82258065 0.79032258 0.77419355 0.87096774
|
|
0.77419355 0.82258065 0.75409836 0.81967213]
|
|
|
|
mean value: 0.7960867265996827
|
|
|
|
key: train_accuracy
|
|
value: [0.78956835 0.81654676 0.82374101 0.81654676 0.8057554 0.8147482
|
|
0.80935252 0.80035971 0.80071813 0.82046679]
|
|
|
|
mean value: 0.8097803624246025
|
|
|
|
key: test_fscore
|
|
value: [0.75409836 0.76666667 0.83076923 0.8115942 0.78787879 0.88235294
|
|
0.77419355 0.81355932 0.7826087 0.80701754]
|
|
|
|
mean value: 0.8010739299978262
|
|
|
|
key: train_fscore
|
|
value: [0.80467446 0.82229965 0.82685512 0.82167832 0.8125 0.81769912
|
|
0.81468531 0.8042328 0.80492091 0.82517483]
|
|
|
|
mean value: 0.8154720527371425
|
|
|
|
key: test_precision
|
|
value: [0.76666667 0.79310345 0.79411765 0.73684211 0.74285714 0.81081081
|
|
0.77419355 0.85714286 0.71052632 0.85185185]
|
|
|
|
mean value: 0.7838112394103743
|
|
|
|
key: train_precision
|
|
value: [0.75077882 0.7972973 0.8125 0.79931973 0.7852349 0.80487805
|
|
0.79251701 0.78892734 0.78694158 0.80546075]
|
|
|
|
mean value: 0.7923855463549293
|
|
|
|
key: test_recall
|
|
value: [0.74193548 0.74193548 0.87096774 0.90322581 0.83870968 0.96774194
|
|
0.77419355 0.77419355 0.87096774 0.76666667]
|
|
|
|
mean value: 0.8250537634408602
|
|
|
|
key: train_recall
|
|
value: [0.86690647 0.84892086 0.84172662 0.84532374 0.84172662 0.83093525
|
|
0.8381295 0.82014388 0.82374101 0.84587814]
|
|
|
|
mean value: 0.8403432093035249
|
|
|
|
key: test_roc_auc
|
|
value: [0.75806452 0.77419355 0.82258065 0.79032258 0.77419355 0.87096774
|
|
0.77419355 0.82258065 0.75215054 0.8188172 ]
|
|
|
|
mean value: 0.7958064516129032
|
|
|
|
key: train_roc_auc
|
|
value: [0.78956835 0.81654676 0.82374101 0.81654676 0.8057554 0.8147482
|
|
0.80935252 0.80035971 0.80075939 0.82042108]
|
|
|
|
mean value: 0.8097799180010831
|
|
|
|
key: test_jcc
|
|
value: [0.60526316 0.62162162 0.71052632 0.68292683 0.65 0.78947368
|
|
0.63157895 0.68571429 0.64285714 0.67647059]
|
|
|
|
mean value: 0.6696432572959795
|
|
|
|
key: train_jcc
|
|
value: [0.67318436 0.69822485 0.70481928 0.69732938 0.68421053 0.69161677
|
|
0.68731563 0.67256637 0.67352941 0.70238095]
|
|
|
|
mean value: 0.6885177526404157
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01177359 0.01193714 0.01188874 0.01178646 0.01160216 0.01176119
|
|
0.01168275 0.0110743 0.01171422 0.01068854]
|
|
|
|
mean value: 0.011590909957885743
|
|
|
|
key: score_time
|
|
value: [0.010288 0.00987935 0.00976849 0.00973058 0.00971985 0.00978112
|
|
0.0097537 0.00968814 0.00973296 0.00953555]
|
|
|
|
mean value: 0.009787774085998536
|
|
|
|
key: test_mcc
|
|
value: [0.67883359 0.64549722 0.7130241 0.77459667 0.84266484 0.67741935
|
|
0.67741935 0.74193548 0.54086022 0.67858574]
|
|
|
|
mean value: 0.6970836566982328
|
|
|
|
key: train_mcc
|
|
value: [0.71230395 0.73741484 0.72313855 0.73022055 0.71949894 0.73741484
|
|
0.73741484 0.72663751 0.74147134 0.72728798]
|
|
|
|
mean value: 0.7292803358963176
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.82258065 0.85483871 0.88709677 0.91935484 0.83870968
|
|
0.83870968 0.87096774 0.7704918 0.83606557]
|
|
|
|
mean value: 0.8477525118984665
|
|
|
|
key: train_accuracy
|
|
value: [0.85611511 0.86870504 0.86151079 0.86510791 0.85971223 0.86870504
|
|
0.86870504 0.86330935 0.87073609 0.86355476]
|
|
|
|
mean value: 0.8646161347403226
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.82539683 0.86153846 0.88888889 0.91525424 0.83870968
|
|
0.83870968 0.87096774 0.77419355 0.84375 ]
|
|
|
|
mean value: 0.8490742391606935
|
|
|
|
key: train_fscore
|
|
value: [0.85507246 0.86894075 0.86025408 0.86535009 0.86071429 0.86894075
|
|
0.86846847 0.86281588 0.8705036 0.86231884]
|
|
|
|
mean value: 0.8643379221459592
|
|
|
|
key: test_precision
|
|
value: [0.86206897 0.8125 0.82352941 0.875 0.96428571 0.83870968
|
|
0.83870968 0.87096774 0.77419355 0.79411765]
|
|
|
|
mean value: 0.8454082383787775
|
|
|
|
key: train_precision
|
|
value: [0.86131387 0.86738351 0.86813187 0.86379928 0.85460993 0.86738351
|
|
0.8700361 0.86594203 0.8705036 0.87179487]
|
|
|
|
mean value: 0.8660898573052462
|
|
|
|
key: test_recall
|
|
value: [0.80645161 0.83870968 0.90322581 0.90322581 0.87096774 0.83870968
|
|
0.83870968 0.87096774 0.77419355 0.9 ]
|
|
|
|
mean value: 0.854516129032258
|
|
|
|
key: train_recall
|
|
value: [0.84892086 0.8705036 0.85251799 0.86690647 0.86690647 0.8705036
|
|
0.86690647 0.85971223 0.8705036 0.85304659]
|
|
|
|
mean value: 0.8626427889946108
|
|
|
|
key: test_roc_auc
|
|
value: [0.83870968 0.82258065 0.85483871 0.88709677 0.91935484 0.83870968
|
|
0.83870968 0.87096774 0.77043011 0.83709677]
|
|
|
|
mean value: 0.8478494623655914
|
|
|
|
key: train_roc_auc
|
|
value: [0.85611511 0.86870504 0.86151079 0.86510791 0.85971223 0.86870504
|
|
0.86870504 0.86330935 0.87073567 0.86357366]
|
|
|
|
mean value: 0.8646179830329285
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.7027027 0.75675676 0.8 0.84375 0.72222222
|
|
0.72222222 0.77142857 0.63157895 0.72972973]
|
|
|
|
mean value: 0.7394676866716341
|
|
|
|
key: train_jcc
|
|
value: [0.74683544 0.76825397 0.75477707 0.76265823 0.75548589 0.76825397
|
|
0.76751592 0.75873016 0.77070064 0.75796178]
|
|
|
|
mean value: 0.7611173073553839
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01102924 0.01101851 0.01121306 0.01111603 0.01100349 0.01029229
|
|
0.01008701 0.01012349 0.0112462 0.01112509]
|
|
|
|
mean value: 0.010825443267822265
|
|
|
|
key: score_time
|
|
value: [0.01412606 0.0150013 0.01282954 0.01299381 0.01446128 0.01321387
|
|
0.01302505 0.01303339 0.01337314 0.01329184]
|
|
|
|
mean value: 0.013534927368164062
|
|
|
|
key: test_mcc
|
|
value: [0.70116959 0.55301004 0.61807005 0.65372045 0.83914639 0.51856298
|
|
0.5809475 0.58834841 0.60818119 0.58786645]
|
|
|
|
mean value: 0.624902305420752
|
|
|
|
key: train_mcc
|
|
value: [0.7437841 0.75289533 0.74634949 0.74256537 0.73230817 0.72645594
|
|
0.75320817 0.74889946 0.76876369 0.73034296]
|
|
|
|
mean value: 0.7445572685559485
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.77419355 0.80645161 0.82258065 0.91935484 0.75806452
|
|
0.79032258 0.79032258 0.80327869 0.78688525]
|
|
|
|
mean value: 0.8090163934426229
|
|
|
|
key: train_accuracy
|
|
value: [0.8705036 0.87589928 0.87230216 0.8705036 0.86510791 0.86151079
|
|
0.87589928 0.87410072 0.88330341 0.86355476]
|
|
|
|
mean value: 0.8712685506890717
|
|
|
|
key: test_fscore
|
|
value: [0.81481481 0.75862069 0.79310345 0.80701754 0.91803279 0.74576271
|
|
0.79365079 0.77192982 0.8 0.75471698]
|
|
|
|
mean value: 0.7957649594699424
|
|
|
|
key: train_fscore
|
|
value: [0.86466165 0.87245841 0.86778399 0.866171 0.85981308 0.85444234
|
|
0.87198516 0.87132353 0.87850467 0.85714286]
|
|
|
|
mean value: 0.8664286698615212
|
|
|
|
key: test_precision
|
|
value: [0.95652174 0.81481481 0.85185185 0.88461538 0.93333333 0.78571429
|
|
0.78125 0.84615385 0.82758621 0.86956522]
|
|
|
|
mean value: 0.8551406679901807
|
|
|
|
key: train_precision
|
|
value: [0.90551181 0.8973384 0.8996139 0.89615385 0.89494163 0.90039841
|
|
0.90038314 0.89097744 0.91439689 0.90118577]
|
|
|
|
mean value: 0.9000901243730935
|
|
|
|
key: test_recall
|
|
value: [0.70967742 0.70967742 0.74193548 0.74193548 0.90322581 0.70967742
|
|
0.80645161 0.70967742 0.77419355 0.66666667]
|
|
|
|
mean value: 0.7473118279569892
|
|
|
|
key: train_recall
|
|
value: [0.82733813 0.84892086 0.8381295 0.8381295 0.82733813 0.81294964
|
|
0.84532374 0.85251799 0.84532374 0.8172043 ]
|
|
|
|
mean value: 0.8353175524096852
|
|
|
|
key: test_roc_auc
|
|
value: [0.83870968 0.77419355 0.80645161 0.82258065 0.91935484 0.75806452
|
|
0.79032258 0.79032258 0.80376344 0.78494624]
|
|
|
|
mean value: 0.8088709677419355
|
|
|
|
key: train_roc_auc
|
|
value: [0.8705036 0.87589928 0.87230216 0.8705036 0.86510791 0.86151079
|
|
0.87589928 0.87410072 0.88323535 0.86363812]
|
|
|
|
mean value: 0.8712700807096259
|
|
|
|
key: test_jcc
|
|
value: [0.6875 0.61111111 0.65714286 0.67647059 0.84848485 0.59459459
|
|
0.65789474 0.62857143 0.66666667 0.60606061]
|
|
|
|
mean value: 0.6634497437709512
|
|
|
|
key: train_jcc
|
|
value: [0.7615894 0.77377049 0.76644737 0.76393443 0.75409836 0.74587459
|
|
0.77302632 0.77198697 0.78333333 0.75 ]
|
|
|
|
mean value: 0.7644061258348679
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02417302 0.02532053 0.02587271 0.02416277 0.02608395 0.02859092
|
|
0.02403355 0.02894592 0.0248692 0.02357578]
|
|
|
|
mean value: 0.02556283473968506
|
|
|
|
key: score_time
|
|
value: [0.01271033 0.01297474 0.01253748 0.01260567 0.01325917 0.01347256
|
|
0.0124619 0.01322174 0.01214051 0.01210165]
|
|
|
|
mean value: 0.01274857521057129
|
|
|
|
key: test_mcc
|
|
value: [0.87278605 0.74348441 0.7190925 0.75623534 0.87096774 0.87096774
|
|
0.74193548 0.77459667 0.64178842 0.78156791]
|
|
|
|
mean value: 0.7773422266927121
|
|
|
|
key: train_mcc
|
|
value: [0.83715789 0.83214747 0.84053106 0.81773799 0.82321735 0.82227458
|
|
0.82610134 0.81986865 0.82643287 0.82557745]
|
|
|
|
mean value: 0.8271046647580531
|
|
|
|
key: test_accuracy
|
|
value: [0.93548387 0.87096774 0.85483871 0.87096774 0.93548387 0.93548387
|
|
0.87096774 0.88709677 0.81967213 0.8852459 ]
|
|
|
|
mean value: 0.8866208355367531
|
|
|
|
key: train_accuracy
|
|
value: [0.91726619 0.91546763 0.91906475 0.90827338 0.91007194 0.91007194
|
|
0.9118705 0.90827338 0.91202873 0.91202873]
|
|
|
|
mean value: 0.9124417162858582
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.875 0.86567164 0.88235294 0.93548387 0.93548387
|
|
0.87096774 0.88888889 0.83076923 0.89230769]
|
|
|
|
mean value: 0.8914425878804295
|
|
|
|
key: train_fscore
|
|
value: [0.92041522 0.91768827 0.9220104 0.91068301 0.9137931 0.91319444
|
|
0.91507799 0.91222031 0.91507799 0.91478261]
|
|
|
|
mean value: 0.9154943347587674
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.84848485 0.80555556 0.81081081 0.93548387 0.93548387
|
|
0.87096774 0.875 0.79411765 0.82857143]
|
|
|
|
mean value: 0.8613566683443343
|
|
|
|
key: train_precision
|
|
value: [0.88666667 0.89419795 0.88963211 0.88737201 0.87748344 0.88255034
|
|
0.88294314 0.87458746 0.88294314 0.88851351]
|
|
|
|
mean value: 0.8846889778724271
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.90322581 0.93548387 0.96774194 0.93548387 0.93548387
|
|
0.87096774 0.90322581 0.87096774 0.96666667]
|
|
|
|
mean value: 0.9256989247311828
|
|
|
|
key: train_recall
|
|
value: [0.95683453 0.94244604 0.95683453 0.9352518 0.95323741 0.94604317
|
|
0.94964029 0.95323741 0.94964029 0.94265233]
|
|
|
|
mean value: 0.9485817797375004
|
|
|
|
key: test_roc_auc
|
|
value: [0.93548387 0.87096774 0.85483871 0.87096774 0.93548387 0.93548387
|
|
0.87096774 0.88709677 0.8188172 0.88655914]
|
|
|
|
mean value: 0.8866666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.91726619 0.91546763 0.91906475 0.90827338 0.91007194 0.91007194
|
|
0.9118705 0.90827338 0.91209613 0.91197365]
|
|
|
|
mean value: 0.9124429488667131
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.77777778 0.76315789 0.78947368 0.87878788 0.87878788
|
|
0.77142857 0.8 0.71052632 0.80555556]
|
|
|
|
mean value: 0.8057848498250975
|
|
|
|
key: train_jcc
|
|
value: [0.8525641 0.84789644 0.85530547 0.83601286 0.84126984 0.84025559
|
|
0.84345048 0.83860759 0.84345048 0.84294872]
|
|
|
|
mean value: 0.8441761574343863
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.47
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.69970942 2.01441073 2.02653909 2.06026721 2.04769754 2.04136062
|
|
1.92918682 1.99944806 1.96888709 2.03387642]
|
|
|
|
mean value: 1.9821382999420165
|
|
|
|
key: score_time
|
|
value: [0.01246619 0.02080941 0.01246428 0.0147903 0.0124898 0.02321792
|
|
0.01491332 0.02058625 0.01244164 0.01501036]
|
|
|
|
mean value: 0.015918946266174315
|
|
|
|
key: test_mcc
|
|
value: [0.87278605 0.84266484 0.80813523 0.87278605 0.90369611 0.87096774
|
|
0.80645161 0.90748521 0.67204301 0.8688172 ]
|
|
|
|
mean value: 0.8425833063747562
|
|
|
|
key: train_mcc
|
|
value: [0.99280576 0.98921503 0.99280576 0.98921503 0.98921503 0.99640932
|
|
0.99280576 0.99640932 0.98923428 1. ]
|
|
|
|
mean value: 0.9928115293600994
|
|
|
|
key: test_accuracy
|
|
value: [0.93548387 0.91935484 0.90322581 0.93548387 0.9516129 0.93548387
|
|
0.90322581 0.9516129 0.83606557 0.93442623]
|
|
|
|
mean value: 0.920597567424643
|
|
|
|
key: train_accuracy
|
|
value: [0.99640288 0.99460432 0.99640288 0.99460432 0.99460432 0.99820144
|
|
0.99640288 0.99820144 0.994614 1. ]
|
|
|
|
mean value: 0.9964038464022319
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.92307692 0.90625 0.9375 0.95238095 0.93548387
|
|
0.90322581 0.94915254 0.83870968 0.93333333]
|
|
|
|
mean value: 0.9216613106002799
|
|
|
|
key: train_fscore
|
|
value: [0.99640288 0.99459459 0.99640288 0.99459459 0.99459459 0.99820467
|
|
0.99640288 0.99820467 0.99459459 1. ]
|
|
|
|
mean value: 0.9963996347199013
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.88235294 0.87878788 0.90909091 0.9375 0.93548387
|
|
0.90322581 1. 0.83870968 0.93333333]
|
|
|
|
mean value: 0.912757532631821
|
|
|
|
key: train_precision
|
|
value: [0.99640288 0.99638989 0.99640288 0.99638989 0.99638989 0.99641577
|
|
0.99640288 0.99641577 0.99638989 1. ]
|
|
|
|
mean value: 0.9967599741099167
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.93548387 0.96774194 0.96774194 0.93548387
|
|
0.90322581 0.90322581 0.83870968 0.93333333]
|
|
|
|
mean value: 0.9320430107526881
|
|
|
|
key: train_recall
|
|
value: [0.99640288 0.99280576 0.99640288 0.99280576 0.99280576 1.
|
|
0.99640288 1. 0.99280576 1. ]
|
|
|
|
mean value: 0.996043165467626
|
|
|
|
key: test_roc_auc
|
|
value: [0.93548387 0.91935484 0.90322581 0.93548387 0.9516129 0.93548387
|
|
0.90322581 0.9516129 0.83602151 0.9344086 ]
|
|
|
|
mean value: 0.9205913978494624
|
|
|
|
key: train_roc_auc
|
|
value: [0.99640288 0.99460432 0.99640288 0.99460432 0.99460432 0.99820144
|
|
0.99640288 0.99820144 0.99461076 1. ]
|
|
|
|
mean value: 0.9964035223434156
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.85714286 0.82857143 0.88235294 0.90909091 0.87878788
|
|
0.82352941 0.90322581 0.72222222 0.875 ]
|
|
|
|
mean value: 0.8562276396384556
|
|
|
|
key: train_jcc
|
|
value: [0.99283154 0.98924731 0.99283154 0.98924731 0.98924731 0.99641577
|
|
0.99283154 0.99641577 0.98924731 1. ]
|
|
|
|
mean value: 0.992831541218638
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03310966 0.02370453 0.02852869 0.024019 0.02490878 0.02375698
|
|
0.0259831 0.02331567 0.02899504 0.02387166]
|
|
|
|
mean value: 0.02601931095123291
|
|
|
|
key: score_time
|
|
value: [0.01206684 0.00918984 0.00911832 0.00887275 0.00887132 0.00887656
|
|
0.00896263 0.00905371 0.00915074 0.00887108]
|
|
|
|
mean value: 0.00930337905883789
|
|
|
|
key: test_mcc
|
|
value: [0.93548387 0.93548387 1. 0.87278605 0.96824584 0.96824584
|
|
0.87096774 0.90748521 0.93649139 0.83655914]
|
|
|
|
mean value: 0.9231748950840374
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96774194 0.96774194 1. 0.93548387 0.98387097 0.98387097
|
|
0.93548387 0.9516129 0.96721311 0.91803279]
|
|
|
|
mean value: 0.9611052353252247
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.96774194 1. 0.9375 0.98360656 0.98360656
|
|
0.93548387 0.94915254 0.96666667 0.91803279]
|
|
|
|
mean value: 0.9609532852614375
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.96774194 1. 0.90909091 1. 1.
|
|
0.93548387 1. 1. 0.90322581]
|
|
|
|
mean value: 0.9683284457478005
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 1. 0.96774194 0.96774194 0.96774194
|
|
0.93548387 0.90322581 0.93548387 0.93333333]
|
|
|
|
mean value: 0.9546236559139785
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96774194 0.96774194 1. 0.93548387 0.98387097 0.98387097
|
|
0.93548387 0.9516129 0.96774194 0.91827957]
|
|
|
|
mean value: 0.9611827956989247
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.9375 1. 0.88235294 0.96774194 0.96774194
|
|
0.87878788 0.90322581 0.93548387 0.84848485]
|
|
|
|
mean value: 0.9258819216836295
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.130229 0.126683 0.12822556 0.12735248 0.12751722 0.12969017
|
|
0.12745762 0.12832069 0.12567377 0.12686229]
|
|
|
|
mean value: 0.12780117988586426
|
|
|
|
key: score_time
|
|
value: [0.01785779 0.01803756 0.017869 0.01788187 0.01802921 0.01807475
|
|
0.01809239 0.01788807 0.01800728 0.0179708 ]
|
|
|
|
mean value: 0.017970871925354005
|
|
|
|
key: test_mcc
|
|
value: [0.93548387 0.80813523 0.80813523 0.83914639 0.93548387 0.83914639
|
|
0.74348441 0.93743687 0.77072165 0.90215054]
|
|
|
|
mean value: 0.8519324452598774
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96774194 0.90322581 0.90322581 0.91935484 0.96774194 0.91935484
|
|
0.87096774 0.96774194 0.8852459 0.95081967]
|
|
|
|
mean value: 0.9255420412480169
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.90625 0.90625 0.92063492 0.96774194 0.91803279
|
|
0.875 0.96666667 0.88888889 0.95081967]
|
|
|
|
mean value: 0.9268026806174612
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.87878788 0.87878788 0.90625 0.96774194 0.93333333
|
|
0.84848485 1. 0.875 0.93548387]
|
|
|
|
mean value: 0.9191611681329424
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 0.93548387 0.93548387 0.96774194 0.90322581
|
|
0.90322581 0.93548387 0.90322581 0.96666667]
|
|
|
|
mean value: 0.9353763440860214
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96774194 0.90322581 0.90322581 0.91935484 0.96774194 0.91935484
|
|
0.87096774 0.96774194 0.88494624 0.95107527]
|
|
|
|
mean value: 0.9255376344086022
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.82857143 0.82857143 0.85294118 0.9375 0.84848485
|
|
0.77777778 0.93548387 0.8 0.90625 ]
|
|
|
|
mean value: 0.8653080530843814
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.41
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01067162 0.01046467 0.01075888 0.01043582 0.01073885 0.01057148
|
|
0.0104928 0.01046562 0.01066518 0.0106113 ]
|
|
|
|
mean value: 0.010587620735168456
|
|
|
|
key: score_time
|
|
value: [0.00888348 0.00908494 0.00885534 0.00883126 0.00887513 0.00885701
|
|
0.00883675 0.00886393 0.0088141 0.00885773]
|
|
|
|
mean value: 0.00887596607208252
|
|
|
|
key: test_mcc
|
|
value: [0.74348441 0.64820372 0.45374261 0.74193548 0.58834841 0.54953196
|
|
0.61418277 0.61807005 0.57419355 0.64708149]
|
|
|
|
mean value: 0.6178774447532813
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.87096774 0.82258065 0.72580645 0.87096774 0.79032258 0.77419355
|
|
0.80645161 0.80645161 0.78688525 0.81967213]
|
|
|
|
mean value: 0.8074299312533051
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.83076923 0.71186441 0.87096774 0.80597015 0.76666667
|
|
0.8125 0.79310345 0.78688525 0.8 ]
|
|
|
|
mean value: 0.8053726889582276
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84848485 0.79411765 0.75 0.87096774 0.75 0.79310345
|
|
0.78787879 0.85185185 0.8 0.88 ]
|
|
|
|
mean value: 0.8126404325485658
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90322581 0.87096774 0.67741935 0.87096774 0.87096774 0.74193548
|
|
0.83870968 0.74193548 0.77419355 0.73333333]
|
|
|
|
mean value: 0.8023655913978495
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.87096774 0.82258065 0.72580645 0.87096774 0.79032258 0.77419355
|
|
0.80645161 0.80645161 0.78709677 0.81827957]
|
|
|
|
mean value: 0.8073118279569893
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.71052632 0.55263158 0.77142857 0.675 0.62162162
|
|
0.68421053 0.65714286 0.64864865 0.66666667]
|
|
|
|
mean value: 0.6765654564338774
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.01657772 2.0090251 2.03078365 2.00115323 2.0330627 2.06364393
|
|
2.02089286 2.09506607 2.07742858 2.05084038]
|
|
|
|
mean value: 2.039847421646118
|
|
|
|
key: score_time
|
|
value: [0.09266973 0.09811974 0.09791493 0.0990932 0.09410357 0.09961629
|
|
0.09475183 0.10066032 0.10026288 0.10025406]
|
|
|
|
mean value: 0.09774465560913086
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.87278605 0.93743687 0.84266484 0.96824584 1.
|
|
0.83914639 0.96824584 0.93635873 0.83655914]
|
|
|
|
mean value: 0.9169689529423093
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.93548387 0.96774194 0.91935484 0.98387097 1.
|
|
0.91935484 0.98387097 0.96721311 0.91803279]
|
|
|
|
mean value: 0.9578794288736119
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.9375 0.96875 0.92307692 0.98412698 1.
|
|
0.92063492 0.98360656 0.96875 0.91803279]
|
|
|
|
mean value: 0.9588605156228107
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.90909091 0.93939394 0.88235294 0.96875 1.
|
|
0.90625 1. 0.93939394 0.90322581]
|
|
|
|
mean value: 0.9417207535506872
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 0.96774194 1. 1.
|
|
0.93548387 0.96774194 1. 0.93333333]
|
|
|
|
mean value: 0.9772043010752688
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.93548387 0.96774194 0.91935484 0.98387097 1.
|
|
0.91935484 0.98387097 0.96666667 0.91827957]
|
|
|
|
mean value: 0.9578494623655914
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.88235294 0.93939394 0.85714286 0.96875 1.
|
|
0.85294118 0.96774194 0.93939394 0.84848485]
|
|
|
|
mean value: 0.9224951637546515
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.00179338 1.07502389 1.05377007 0.99681354 1.01473498 1.02150321
|
|
0.98504329 1.10019326 1.03762698 1.07313776]
|
|
|
|
mean value: 1.035964035987854
|
|
|
|
key: score_time
|
|
value: [0.27896857 0.23714018 0.17314553 0.26825953 0.26656032 0.25020838
|
|
0.22471714 0.17120504 0.24397707 0.22602963]
|
|
|
|
mean value: 0.23402113914489747
|
|
|
|
key: test_mcc
|
|
value: [1. 0.83914639 0.93743687 0.87831007 0.93743687 0.96824584
|
|
0.87096774 0.96824584 0.93635873 0.83655914]
|
|
|
|
mean value: 0.9172707478226112
|
|
|
|
key: train_mcc
|
|
value: [0.96778244 0.96778244 0.96778244 0.97487691 0.97132357 0.97487691
|
|
0.97132357 0.97851856 0.96080787 0.97502162]
|
|
|
|
mean value: 0.9710096336881678
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.91935484 0.96774194 0.93548387 0.96774194 0.98387097
|
|
0.93548387 0.98387097 0.96721311 0.91803279]
|
|
|
|
mean value: 0.9578794288736119
|
|
|
|
key: train_accuracy
|
|
value: [0.98381295 0.98381295 0.98381295 0.98741007 0.98561151 0.98741007
|
|
0.98561151 0.98920863 0.98025135 0.98743268]
|
|
|
|
mean value: 0.9854374669026
|
|
|
|
key: test_fscore
|
|
value: [1. 0.92063492 0.96875 0.93939394 0.96875 0.98412698
|
|
0.93548387 0.98360656 0.96875 0.91803279]
|
|
|
|
mean value: 0.9587529059385881
|
|
|
|
key: train_fscore
|
|
value: [0.98395722 0.98395722 0.98395722 0.98747764 0.98571429 0.98747764
|
|
0.98571429 0.98928571 0.98046181 0.98756661]
|
|
|
|
mean value: 0.9855569639932104
|
|
|
|
key: test_precision
|
|
value: [1. 0.90625 0.93939394 0.88571429 0.93939394 0.96875
|
|
0.93548387 1. 0.93939394 0.90322581]
|
|
|
|
mean value: 0.9417605781315459
|
|
|
|
key: train_precision
|
|
value: [0.97526502 0.97526502 0.97526502 0.98220641 0.9787234 0.98220641
|
|
0.9787234 0.9822695 0.96842105 0.97887324]
|
|
|
|
mean value: 0.977721846851637
|
|
|
|
key: test_recall
|
|
value: [1. 0.93548387 1. 1. 1. 1.
|
|
0.93548387 0.96774194 1. 0.93333333]
|
|
|
|
mean value: 0.9772043010752688
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.99280576 0.99280576 0.99280576 0.99280576 0.99280576
|
|
0.99280576 0.99640288 0.99280576 0.99641577]
|
|
|
|
mean value: 0.9935264691472628
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.91935484 0.96774194 0.93548387 0.96774194 0.98387097
|
|
0.93548387 0.98387097 0.96666667 0.91827957]
|
|
|
|
mean value: 0.9578494623655914
|
|
|
|
key: train_roc_auc
|
|
value: [0.98381295 0.98381295 0.98381295 0.98741007 0.98561151 0.98741007
|
|
0.98561151 0.98920863 0.98027385 0.98741652]
|
|
|
|
mean value: 0.9854381011319977
|
|
|
|
key: test_jcc
|
|
value: [1. 0.85294118 0.93939394 0.88571429 0.93939394 0.96875
|
|
0.87878788 0.96774194 0.93939394 0.84848485]
|
|
|
|
mean value: 0.922060194312329
|
|
|
|
key: train_jcc
|
|
value: [0.96842105 0.96842105 0.96842105 0.97526502 0.97183099 0.97526502
|
|
0.97183099 0.97879859 0.96167247 0.9754386 ]
|
|
|
|
mean value: 0.9715364821992674
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.41
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02595091 0.01143193 0.01075721 0.0116322 0.01162744 0.01118588
|
|
0.01176763 0.01181078 0.01117778 0.01176858]
|
|
|
|
mean value: 0.012911033630371094
|
|
|
|
key: score_time
|
|
value: [0.01068473 0.00950789 0.00947976 0.00941181 0.00950432 0.00955606
|
|
0.00921035 0.00912285 0.00968337 0.0091567 ]
|
|
|
|
mean value: 0.009531784057617187
|
|
|
|
key: test_mcc
|
|
value: [0.67883359 0.64549722 0.7130241 0.77459667 0.84266484 0.67741935
|
|
0.67741935 0.74193548 0.54086022 0.67858574]
|
|
|
|
mean value: 0.6970836566982328
|
|
|
|
key: train_mcc
|
|
value: [0.71230395 0.73741484 0.72313855 0.73022055 0.71949894 0.73741484
|
|
0.73741484 0.72663751 0.74147134 0.72728798]
|
|
|
|
mean value: 0.7292803358963176
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.82258065 0.85483871 0.88709677 0.91935484 0.83870968
|
|
0.83870968 0.87096774 0.7704918 0.83606557]
|
|
|
|
mean value: 0.8477525118984665
|
|
|
|
key: train_accuracy
|
|
value: [0.85611511 0.86870504 0.86151079 0.86510791 0.85971223 0.86870504
|
|
0.86870504 0.86330935 0.87073609 0.86355476]
|
|
|
|
mean value: 0.8646161347403226
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.82539683 0.86153846 0.88888889 0.91525424 0.83870968
|
|
0.83870968 0.87096774 0.77419355 0.84375 ]
|
|
|
|
mean value: 0.8490742391606935
|
|
|
|
key: train_fscore
|
|
value: [0.85507246 0.86894075 0.86025408 0.86535009 0.86071429 0.86894075
|
|
0.86846847 0.86281588 0.8705036 0.86231884]
|
|
|
|
mean value: 0.8643379221459592
|
|
|
|
key: test_precision
|
|
value: [0.86206897 0.8125 0.82352941 0.875 0.96428571 0.83870968
|
|
0.83870968 0.87096774 0.77419355 0.79411765]
|
|
|
|
mean value: 0.8454082383787775
|
|
|
|
key: train_precision
|
|
value: [0.86131387 0.86738351 0.86813187 0.86379928 0.85460993 0.86738351
|
|
0.8700361 0.86594203 0.8705036 0.87179487]
|
|
|
|
mean value: 0.8660898573052462
|
|
|
|
key: test_recall
|
|
value: [0.80645161 0.83870968 0.90322581 0.90322581 0.87096774 0.83870968
|
|
0.83870968 0.87096774 0.77419355 0.9 ]
|
|
|
|
mean value: 0.854516129032258
|
|
|
|
key: train_recall
|
|
value: [0.84892086 0.8705036 0.85251799 0.86690647 0.86690647 0.8705036
|
|
0.86690647 0.85971223 0.8705036 0.85304659]
|
|
|
|
mean value: 0.8626427889946108
|
|
|
|
key: test_roc_auc
|
|
value: [0.83870968 0.82258065 0.85483871 0.88709677 0.91935484 0.83870968
|
|
0.83870968 0.87096774 0.77043011 0.83709677]
|
|
|
|
mean value: 0.8478494623655914
|
|
|
|
key: train_roc_auc
|
|
value: [0.85611511 0.86870504 0.86151079 0.86510791 0.85971223 0.86870504
|
|
0.86870504 0.86330935 0.87073567 0.86357366]
|
|
|
|
mean value: 0.8646179830329285
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.7027027 0.75675676 0.8 0.84375 0.72222222
|
|
0.72222222 0.77142857 0.63157895 0.72972973]
|
|
|
|
mean value: 0.7394676866716341
|
|
|
|
key: train_jcc
|
|
value: [0.74683544 0.76825397 0.75477707 0.76265823 0.75548589 0.76825397
|
|
0.76751592 0.75873016 0.77070064 0.75796178]
|
|
|
|
mean value: 0.7611173073553839
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.08813882 0.076159 0.07938814 0.12023497 0.13540554 0.10756207
|
|
0.07338667 0.0774343 0.07803369 0.07466531]
|
|
|
|
mean value: 0.09104084968566895
|
|
|
|
key: score_time
|
|
value: [0.01114702 0.01104593 0.01131225 0.01271844 0.01222229 0.01108837
|
|
0.01109123 0.01365948 0.01104116 0.01136756]
|
|
|
|
mean value: 0.011669373512268067
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.93548387 0.96824584 0.87831007 0.96824584 0.93743687
|
|
1. 0.96824584 1. 0.8688172 ]
|
|
|
|
mean value: 0.9493031353691006
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.96774194 0.98387097 0.93548387 0.98387097 0.96774194
|
|
1. 0.98387097 1. 0.93442623]
|
|
|
|
mean value: 0.9740877842411423
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.96774194 0.98412698 0.93939394 0.98412698 0.96666667
|
|
1. 0.98360656 1. 0.93333333]
|
|
|
|
mean value: 0.9743123384635811
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.96774194 0.96875 0.88571429 0.96875 1.
|
|
1. 1. 1. 0.93333333]
|
|
|
|
mean value: 0.969303955453149
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 1. 1. 0.93548387
|
|
1. 0.96774194 1. 0.93333333]
|
|
|
|
mean value: 0.9804301075268818
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.96774194 0.98387097 0.93548387 0.98387097 0.96774194
|
|
1. 0.98387097 1. 0.9344086 ]
|
|
|
|
mean value: 0.9740860215053764
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.9375 0.96875 0.88571429 0.96875 0.93548387
|
|
1. 0.96774194 1. 0.875 ]
|
|
|
|
mean value: 0.9507690092165899
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04738498 0.06282306 0.06595659 0.04460764 0.08294272 0.07518077
|
|
0.05843568 0.09132147 0.07124734 0.05581951]
|
|
|
|
mean value: 0.06557197570800781
|
|
|
|
key: score_time
|
|
value: [0.01911306 0.01217628 0.02016044 0.01214337 0.01833034 0.01263642
|
|
0.01912284 0.02297115 0.0121851 0.01308465]
|
|
|
|
mean value: 0.016192364692687988
|
|
|
|
key: test_mcc
|
|
value: [0.83914639 0.83914639 0.90748521 0.81325006 0.90369611 0.96824584
|
|
0.96824584 0.90748521 0.9344086 0.80475071]
|
|
|
|
mean value: 0.8885860367733198
|
|
|
|
key: train_mcc
|
|
value: [0.96425338 0.96768225 0.97132357 0.97124816 0.96412858 0.96768225
|
|
0.96412858 0.96778244 0.97137553 0.98210326]
|
|
|
|
mean value: 0.9691707996843315
|
|
|
|
key: test_accuracy
|
|
value: [0.91935484 0.91935484 0.9516129 0.90322581 0.9516129 0.98387097
|
|
0.98387097 0.9516129 0.96721311 0.90163934]
|
|
|
|
mean value: 0.9433368588048652
|
|
|
|
key: train_accuracy
|
|
value: [0.98201439 0.98381295 0.98561151 0.98561151 0.98201439 0.98381295
|
|
0.98201439 0.98381295 0.98563734 0.99102334]
|
|
|
|
mean value: 0.9845365718197435
|
|
|
|
key: test_fscore
|
|
value: [0.92063492 0.91803279 0.95384615 0.90909091 0.95238095 0.98360656
|
|
0.98360656 0.94915254 0.96774194 0.89655172]
|
|
|
|
mean value: 0.9434645039586963
|
|
|
|
key: train_fscore
|
|
value: [0.98220641 0.98389982 0.98571429 0.98566308 0.98214286 0.98389982
|
|
0.98214286 0.98395722 0.98571429 0.99108734]
|
|
|
|
mean value: 0.9846427979343616
|
|
|
|
key: test_precision
|
|
value: [0.90625 0.93333333 0.91176471 0.85714286 0.9375 1.
|
|
1. 1. 0.96774194 0.92857143]
|
|
|
|
mean value: 0.9442304260413843
|
|
|
|
key: train_precision
|
|
value: [0.97183099 0.97864769 0.9787234 0.98214286 0.9751773 0.97864769
|
|
0.9751773 0.97526502 0.9787234 0.9858156 ]
|
|
|
|
mean value: 0.978015125566827
|
|
|
|
key: test_recall
|
|
value: [0.93548387 0.90322581 1. 0.96774194 0.96774194 0.96774194
|
|
0.96774194 0.90322581 0.96774194 0.86666667]
|
|
|
|
mean value: 0.9447311827956989
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.98920863 0.99280576 0.98920863 0.98920863 0.98920863
|
|
0.98920863 0.99280576 0.99280576 0.99641577]
|
|
|
|
mean value: 0.9913681957659679
|
|
|
|
key: test_roc_auc
|
|
value: [0.91935484 0.91935484 0.9516129 0.90322581 0.9516129 0.98387097
|
|
0.98387097 0.9516129 0.9672043 0.90107527]
|
|
|
|
mean value: 0.9432795698924732
|
|
|
|
key: train_roc_auc
|
|
value: [0.98201439 0.98381295 0.98561151 0.98561151 0.98201439 0.98381295
|
|
0.98201439 0.98381295 0.98565019 0.99101364]
|
|
|
|
mean value: 0.9845368866197365
|
|
|
|
key: test_jcc
|
|
value: [0.85294118 0.84848485 0.91176471 0.83333333 0.90909091 0.96774194
|
|
0.96774194 0.90322581 0.9375 0.8125 ]
|
|
|
|
mean value: 0.8944324650681387
|
|
|
|
key: train_jcc
|
|
value: [0.96503497 0.96830986 0.97183099 0.97173145 0.96491228 0.96830986
|
|
0.96491228 0.96842105 0.97183099 0.98233216]
|
|
|
|
mean value: 0.9697625873451181
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0249753 0.01080441 0.01006317 0.0100286 0.01024675 0.01045632
|
|
0.01094842 0.01116395 0.01117063 0.01019931]
|
|
|
|
mean value: 0.01200568675994873
|
|
|
|
key: score_time
|
|
value: [0.00920653 0.00890374 0.00869942 0.00879359 0.00906634 0.00899959
|
|
0.00966144 0.00934982 0.00933647 0.0087049 ]
|
|
|
|
mean value: 0.009072184562683105
|
|
|
|
key: test_mcc
|
|
value: [0.74348441 0.61290323 0.67741935 0.7130241 0.74193548 0.84266484
|
|
0.54953196 0.67883359 0.50975101 0.74460444]
|
|
|
|
mean value: 0.6814152411688326
|
|
|
|
key: train_mcc
|
|
value: [0.66968894 0.7019886 0.70900474 0.68709037 0.69093363 0.70166132
|
|
0.70180672 0.69093363 0.70939248 0.66609934]
|
|
|
|
mean value: 0.6928599755400829
|
|
|
|
key: test_accuracy
|
|
value: [0.87096774 0.80645161 0.83870968 0.85483871 0.87096774 0.91935484
|
|
0.77419355 0.83870968 0.75409836 0.86885246]
|
|
|
|
mean value: 0.8397144368059228
|
|
|
|
key: train_accuracy
|
|
value: [0.83453237 0.85071942 0.85431655 0.84352518 0.84532374 0.85071942
|
|
0.85071942 0.84532374 0.8545781 0.83303411]
|
|
|
|
mean value: 0.8462792064373635
|
|
|
|
key: test_fscore
|
|
value: [0.86666667 0.80645161 0.83870968 0.86153846 0.87096774 0.92307692
|
|
0.76666667 0.84375 0.76923077 0.875 ]
|
|
|
|
mean value: 0.8422058519437552
|
|
|
|
key: train_fscore
|
|
value: [0.83802817 0.85361552 0.85663717 0.84436494 0.84751773 0.85257549
|
|
0.85309735 0.84751773 0.85612789 0.8342246 ]
|
|
|
|
mean value: 0.8483706574660165
|
|
|
|
key: test_precision
|
|
value: [0.89655172 0.80645161 0.83870968 0.82352941 0.87096774 0.88235294
|
|
0.79310345 0.81818182 0.73529412 0.82352941]
|
|
|
|
mean value: 0.8288671905206617
|
|
|
|
key: train_precision
|
|
value: [0.82068966 0.83737024 0.84320557 0.83985765 0.83566434 0.84210526
|
|
0.83972125 0.83566434 0.84561404 0.82978723]
|
|
|
|
mean value: 0.836967958151763
|
|
|
|
key: test_recall
|
|
value: [0.83870968 0.80645161 0.83870968 0.90322581 0.87096774 0.96774194
|
|
0.74193548 0.87096774 0.80645161 0.93333333]
|
|
|
|
mean value: 0.8578494623655915
|
|
|
|
key: train_recall
|
|
value: [0.85611511 0.8705036 0.8705036 0.84892086 0.85971223 0.86330935
|
|
0.86690647 0.85971223 0.86690647 0.83870968]
|
|
|
|
mean value: 0.8601299605476909
|
|
|
|
key: test_roc_auc
|
|
value: [0.87096774 0.80645161 0.83870968 0.85483871 0.87096774 0.91935484
|
|
0.77419355 0.83870968 0.75322581 0.86989247]
|
|
|
|
mean value: 0.8397311827956989
|
|
|
|
key: train_roc_auc
|
|
value: [0.83453237 0.85071942 0.85431655 0.84352518 0.84532374 0.85071942
|
|
0.85071942 0.84532374 0.85460019 0.8330239 ]
|
|
|
|
mean value: 0.8462803950388076
|
|
|
|
key: test_jcc
|
|
value: [0.76470588 0.67567568 0.72222222 0.75675676 0.77142857 0.85714286
|
|
0.62162162 0.72972973 0.625 0.77777778]
|
|
|
|
mean value: 0.7302061094708153
|
|
|
|
key: train_jcc
|
|
value: [0.72121212 0.74461538 0.74922601 0.73065015 0.73538462 0.74303406
|
|
0.74382716 0.73538462 0.7484472 0.71559633]
|
|
|
|
mean value: 0.7367377649053004
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02157235 0.02386403 0.02440476 0.0301435 0.02720165 0.02603769
|
|
0.02656388 0.02754545 0.03107619 0.03502512]
|
|
|
|
mean value: 0.02734346389770508
|
|
|
|
key: score_time
|
|
value: [0.01022172 0.01124573 0.01181793 0.01186442 0.01206994 0.01182413
|
|
0.01185656 0.01193142 0.0118835 0.01200509]
|
|
|
|
mean value: 0.011672043800354004
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.87096774 0.90369611 0.87831007 0.90369611 0.78446454
|
|
0.87278605 0.90369611 0.77072165 0.8688172 ]
|
|
|
|
mean value: 0.8725401431630221
|
|
|
|
key: train_mcc
|
|
value: [0.97132357 0.93585746 0.93644001 0.96412858 0.88878772 0.87468815
|
|
0.94305636 0.91941603 0.92561092 0.97492135]
|
|
|
|
mean value: 0.9334230161428768
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.93548387 0.9516129 0.93548387 0.9516129 0.88709677
|
|
0.93548387 0.9516129 0.8852459 0.93442623]
|
|
|
|
mean value: 0.935193019566367
|
|
|
|
key: train_accuracy
|
|
value: [0.98561151 0.9676259 0.9676259 0.98201439 0.94244604 0.93345324
|
|
0.97122302 0.95863309 0.96229803 0.98743268]
|
|
|
|
mean value: 0.9658363793704713
|
|
|
|
key: test_fscore
|
|
value: [0.98360656 0.93548387 0.95238095 0.93939394 0.95081967 0.89552239
|
|
0.9375 0.95081967 0.88888889 0.93333333]
|
|
|
|
mean value: 0.9367749274663901
|
|
|
|
key: train_fscore
|
|
value: [0.98550725 0.96703297 0.96842105 0.98214286 0.93962264 0.9376054
|
|
0.97173145 0.96 0.96309315 0.98752228]
|
|
|
|
mean value: 0.9662679037256826
|
|
|
|
key: test_precision
|
|
value: [1. 0.93548387 0.9375 0.88571429 0.96666667 0.83333333
|
|
0.90909091 0.96666667 0.875 0.93333333]
|
|
|
|
mean value: 0.9242789065772936
|
|
|
|
key: train_precision
|
|
value: [0.99270073 0.98507463 0.94520548 0.9751773 0.98809524 0.88253968
|
|
0.95486111 0.92929293 0.94158076 0.9822695 ]
|
|
|
|
mean value: 0.9576797361808079
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 0.96774194 1. 0.93548387 0.96774194
|
|
0.96774194 0.93548387 0.90322581 0.93333333]
|
|
|
|
mean value: 0.9513978494623656
|
|
|
|
key: train_recall
|
|
value: [0.97841727 0.94964029 0.99280576 0.98920863 0.89568345 1.
|
|
0.98920863 0.99280576 0.98561151 0.99283154]
|
|
|
|
mean value: 0.9766212836182667
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.93548387 0.9516129 0.93548387 0.9516129 0.88709677
|
|
0.93548387 0.9516129 0.88494624 0.9344086 ]
|
|
|
|
mean value: 0.9351612903225808
|
|
|
|
key: train_roc_auc
|
|
value: [0.98561151 0.9676259 0.9676259 0.98201439 0.94244604 0.93345324
|
|
0.97122302 0.95863309 0.96233981 0.98742296]
|
|
|
|
mean value: 0.9658395863953998
|
|
|
|
key: test_jcc
|
|
value: [0.96774194 0.87878788 0.90909091 0.88571429 0.90625 0.81081081
|
|
0.88235294 0.90625 0.8 0.875 ]
|
|
|
|
mean value: 0.8821998761064226
|
|
|
|
key: train_jcc
|
|
value: [0.97142857 0.93617021 0.93877551 0.96491228 0.886121 0.88253968
|
|
0.94501718 0.92307692 0.92881356 0.97535211]
|
|
|
|
mean value: 0.9352207031286927
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.35
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02023649 0.02507305 0.02207947 0.02166748 0.02319336 0.02123189
|
|
0.02448797 0.02378654 0.0200305 0.02373862]
|
|
|
|
mean value: 0.02255253791809082
|
|
|
|
key: score_time
|
|
value: [0.0118804 0.01188922 0.01187921 0.01177955 0.01184487 0.01185608
|
|
0.01191831 0.01188183 0.01187134 0.01185822]
|
|
|
|
mean value: 0.011865901947021484
|
|
|
|
key: test_mcc
|
|
value: [0.90748521 0.7284928 0.63960215 0.87831007 0.96824584 0.90369611
|
|
0.83914639 0.90369611 0.50305191 0.83638369]
|
|
|
|
mean value: 0.8108110283179271
|
|
|
|
key: train_mcc
|
|
value: [0.95025527 0.86017051 0.64681322 0.93958474 0.95683453 0.94305636
|
|
0.94305636 0.88357094 0.54525121 0.90616067]
|
|
|
|
mean value: 0.8574753821621066
|
|
|
|
key: test_accuracy
|
|
value: [0.9516129 0.85483871 0.79032258 0.93548387 0.98387097 0.9516129
|
|
0.91935484 0.9516129 0.70491803 0.91803279]
|
|
|
|
mean value: 0.8961660497091486
|
|
|
|
key: train_accuracy
|
|
value: [0.97482014 0.92625899 0.79496403 0.96942446 0.97841727 0.97122302
|
|
0.97122302 0.93884892 0.72890485 0.95152603]
|
|
|
|
mean value: 0.9205610735827855
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.83636364 0.82666667 0.93939394 0.98412698 0.95238095
|
|
0.92063492 0.95238095 0.775 0.91525424]
|
|
|
|
mean value: 0.9056048443082341
|
|
|
|
key: train_fscore
|
|
value: [0.97526502 0.92100193 0.82985075 0.97001764 0.97841727 0.97173145
|
|
0.97173145 0.94217687 0.7864215 0.94953271]
|
|
|
|
mean value: 0.929614657143809
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.95833333 0.70454545 0.88571429 0.96875 0.9375
|
|
0.90625 0.9375 0.63265306 0.93103448]
|
|
|
|
mean value: 0.8774045323458537
|
|
|
|
key: train_precision
|
|
value: [0.95833333 0.99170124 0.70918367 0.95155709 0.97841727 0.95486111
|
|
0.95486111 0.89354839 0.64801865 0.9921875 ]
|
|
|
|
mean value: 0.90326693685663
|
|
|
|
key: test_recall
|
|
value: [1. 0.74193548 1. 1. 1. 0.96774194
|
|
0.93548387 0.96774194 1. 0.9 ]
|
|
|
|
mean value: 0.9512903225806452
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.85971223 1. 0.98920863 0.97841727 0.98920863
|
|
0.98920863 0.99640288 1. 0.91039427]
|
|
|
|
mean value: 0.9705358294009954
|
|
|
|
key: test_roc_auc
|
|
value: [0.9516129 0.85483871 0.79032258 0.93548387 0.98387097 0.9516129
|
|
0.91935484 0.9516129 0.7 0.91774194]
|
|
|
|
mean value: 0.8956451612903226
|
|
|
|
key: train_roc_auc
|
|
value: [0.97482014 0.92625899 0.79496403 0.96942446 0.97841727 0.97122302
|
|
0.97122302 0.93884892 0.72939068 0.95160001]
|
|
|
|
mean value: 0.9206170547433021
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.71875 0.70454545 0.88571429 0.96875 0.90909091
|
|
0.85294118 0.90909091 0.63265306 0.84375 ]
|
|
|
|
mean value: 0.8337050502018989
|
|
|
|
key: train_jcc
|
|
value: [0.95172414 0.85357143 0.70918367 0.94178082 0.95774648 0.94501718
|
|
0.94501718 0.89067524 0.64801865 0.90391459]
|
|
|
|
mean value: 0.8746649384947602
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.36
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.24026704 0.22439814 0.2272625 0.22671032 0.22392869 0.22420788
|
|
0.22610235 0.22615767 0.22761703 0.22552967]
|
|
|
|
mean value: 0.2272181272506714
|
|
|
|
key: score_time
|
|
value: [0.01524711 0.01542163 0.01545 0.01531768 0.01549482 0.01556826
|
|
0.01548982 0.01542044 0.01567101 0.01548672]
|
|
|
|
mean value: 0.015456748008728028
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.93548387 0.96824584 0.87278605 0.93548387 0.93548387
|
|
0.96824584 0.93743687 0.93635873 0.8688172 ]
|
|
|
|
mean value: 0.932658797454636
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.96774194 0.98387097 0.93548387 0.96774194 0.96774194
|
|
0.98387097 0.96774194 0.96721311 0.93442623]
|
|
|
|
mean value: 0.9659703860391328
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.96774194 0.98412698 0.9375 0.96774194 0.96774194
|
|
0.98360656 0.96666667 0.96875 0.93333333]
|
|
|
|
mean value: 0.9661336332082631
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.96774194 0.96875 0.90909091 0.96774194 0.96774194
|
|
1. 1. 0.93939394 0.93333333]
|
|
|
|
mean value: 0.9622543988269795
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 0.96774194 0.96774194 0.96774194
|
|
0.96774194 0.93548387 1. 0.93333333]
|
|
|
|
mean value: 0.970752688172043
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.96774194 0.98387097 0.93548387 0.96774194 0.96774194
|
|
0.98387097 0.96774194 0.96666667 0.9344086 ]
|
|
|
|
mean value: 0.9659139784946237
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.9375 0.96875 0.88235294 0.9375 0.9375
|
|
0.96774194 0.93548387 0.93939394 0.875 ]
|
|
|
|
mean value: 0.9349972687022023
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.31
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07099128 0.07362032 0.09242415 0.08124018 0.08642912 0.09042525
|
|
0.08972216 0.08200741 0.09961677 0.08565569]
|
|
|
|
mean value: 0.08521323204040528
|
|
|
|
key: score_time
|
|
value: [0.02234936 0.02337837 0.02877522 0.02001715 0.03904366 0.02948976
|
|
0.04676676 0.03126311 0.03618932 0.03704667]
|
|
|
|
mean value: 0.031431937217712404
|
|
|
|
key: test_mcc
|
|
value: [1. 0.90369611 0.96824584 0.87278605 0.90369611 0.93743687
|
|
0.96824584 0.96824584 0.96770777 0.80322581]
|
|
|
|
mean value: 0.9293286231770456
|
|
|
|
key: train_mcc
|
|
value: [0.99640932 0.98926624 0.99283145 0.99283145 0.98921503 0.98921503
|
|
0.99640932 0.99283145 0.98923428 0.99284434]
|
|
|
|
mean value: 0.9921087919345165
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9516129 0.98387097 0.93548387 0.9516129 0.96774194
|
|
0.98387097 0.98387097 0.98360656 0.90163934]
|
|
|
|
mean value: 0.9643310417768377
|
|
|
|
key: train_accuracy
|
|
value: [0.99820144 0.99460432 0.99640288 0.99640288 0.99460432 0.99460432
|
|
0.99820144 0.99640288 0.994614 0.99640934]
|
|
|
|
mean value: 0.9960447799749429
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95081967 0.98360656 0.9375 0.95081967 0.96666667
|
|
0.98412698 0.98360656 0.98412698 0.9 ]
|
|
|
|
mean value: 0.9641273093937028
|
|
|
|
key: train_fscore
|
|
value: [0.9981982 0.99457505 0.99638989 0.99638989 0.99459459 0.99459459
|
|
0.99820467 0.99638989 0.99459459 0.99640288]
|
|
|
|
mean value: 0.9960334247841588
|
|
|
|
key: test_precision
|
|
value: [1. 0.96666667 1. 0.90909091 0.96666667 1.
|
|
0.96875 1. 0.96875 0.9 ]
|
|
|
|
mean value: 0.9679924242424243
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.99638989 0.99638989
|
|
0.99641577 1. 0.99638989 1. ]
|
|
|
|
mean value: 0.9985585445699572
|
|
|
|
key: test_recall
|
|
value: [1. 0.93548387 0.96774194 0.96774194 0.93548387 0.93548387
|
|
1. 0.96774194 1. 0.9 ]
|
|
|
|
mean value: 0.9609677419354838
|
|
|
|
key: train_recall
|
|
value: [0.99640288 0.98920863 0.99280576 0.99280576 0.99280576 0.99280576
|
|
1. 0.99280576 0.99280576 0.99283154]
|
|
|
|
mean value: 0.9935277584384106
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.9516129 0.98387097 0.93548387 0.9516129 0.96774194
|
|
0.98387097 0.98387097 0.98333333 0.9016129 ]
|
|
|
|
mean value: 0.9643010752688173
|
|
|
|
key: train_roc_auc
|
|
value: [0.99820144 0.99460432 0.99640288 0.99640288 0.99460432 0.99460432
|
|
0.99820144 0.99640288 0.99461076 0.99641577]
|
|
|
|
mean value: 0.9960450994043475
|
|
|
|
key: test_jcc
|
|
value: [1. 0.90625 0.96774194 0.88235294 0.90625 0.93548387
|
|
0.96875 0.96774194 0.96875 0.81818182]
|
|
|
|
mean value: 0.9321502501293772
|
|
|
|
key: train_jcc
|
|
value: [0.99640288 0.98920863 0.99280576 0.99280576 0.98924731 0.98924731
|
|
0.99641577 0.99280576 0.98924731 0.99283154]
|
|
|
|
mean value: 0.9921018024290246
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20767641 0.30310035 0.23238945 0.24465799 0.23261762 0.22742844
|
|
0.25071764 0.28770041 0.26218009 0.24865627]
|
|
|
|
mean value: 0.24971246719360352
|
|
|
|
key: score_time
|
|
value: [0.02684212 0.02675676 0.03252745 0.0270896 0.0277431 0.02707982
|
|
0.02681971 0.03333616 0.02641869 0.02628922]
|
|
|
|
mean value: 0.028090262413024904
|
|
|
|
key: test_mcc
|
|
value: [0.90748521 0.71004695 0.74348441 0.71004695 0.83914639 0.64549722
|
|
0.67741935 0.80813523 0.63978495 0.70780713]
|
|
|
|
mean value: 0.7388853795423522
|
|
|
|
key: train_mcc
|
|
value: [0.97841727 0.96405373 0.97132357 0.96763216 0.96768225 0.97124816
|
|
0.97844259 0.97841727 0.97129927 0.97487139]
|
|
|
|
mean value: 0.9723387641137683
|
|
|
|
key: test_accuracy
|
|
value: [0.9516129 0.85483871 0.87096774 0.85483871 0.91935484 0.82258065
|
|
0.83870968 0.90322581 0.81967213 0.85245902]
|
|
|
|
mean value: 0.8688260179799048
|
|
|
|
key: train_accuracy
|
|
value: [0.98920863 0.98201439 0.98561151 0.98381295 0.98381295 0.98561151
|
|
0.98920863 0.98920863 0.98563734 0.98743268]
|
|
|
|
mean value: 0.9861559226586415
|
|
|
|
key: test_fscore
|
|
value: [0.94915254 0.85245902 0.875 0.85714286 0.91803279 0.81967213
|
|
0.83870968 0.9 0.81967213 0.84210526]
|
|
|
|
mean value: 0.8671946405666758
|
|
|
|
key: train_fscore
|
|
value: [0.98920863 0.98194946 0.98550725 0.98378378 0.98372514 0.98555957
|
|
0.98916968 0.98920863 0.98555957 0.98747764]
|
|
|
|
mean value: 0.9861149337759959
|
|
|
|
key: test_precision
|
|
value: [1. 0.86666667 0.84848485 0.84375 0.93333333 0.83333333
|
|
0.83870968 0.93103448 0.83333333 0.88888889]
|
|
|
|
mean value: 0.881753456421838
|
|
|
|
key: train_precision
|
|
value: [0.98920863 0.98550725 0.99270073 0.98555957 0.98909091 0.98913043
|
|
0.99275362 0.98920863 0.98913043 0.98571429]
|
|
|
|
mean value: 0.9888004496836691
|
|
|
|
key: test_recall
|
|
value: [0.90322581 0.83870968 0.90322581 0.87096774 0.90322581 0.80645161
|
|
0.83870968 0.87096774 0.80645161 0.8 ]
|
|
|
|
mean value: 0.8541935483870968
|
|
|
|
key: train_recall
|
|
value: [0.98920863 0.97841727 0.97841727 0.98201439 0.97841727 0.98201439
|
|
0.98561151 0.98920863 0.98201439 0.98924731]
|
|
|
|
mean value: 0.9834571052835152
|
|
|
|
key: test_roc_auc
|
|
value: [0.9516129 0.85483871 0.87096774 0.85483871 0.91935484 0.82258065
|
|
0.83870968 0.90322581 0.81989247 0.8516129 ]
|
|
|
|
mean value: 0.8687634408602151
|
|
|
|
key: train_roc_auc
|
|
value: [0.98920863 0.98201439 0.98561151 0.98381295 0.98381295 0.98561151
|
|
0.98920863 0.98920863 0.98563085 0.98742941]
|
|
|
|
mean value: 0.9861549470101338
|
|
|
|
key: test_jcc
|
|
value: [0.90322581 0.74285714 0.77777778 0.75 0.84848485 0.69444444
|
|
0.72222222 0.81818182 0.69444444 0.72727273]
|
|
|
|
mean value: 0.7678911232137039
|
|
|
|
key: train_jcc
|
|
value: [0.97864769 0.96453901 0.97142857 0.96808511 0.96797153 0.97153025
|
|
0.97857143 0.97864769 0.97153025 0.97526502]
|
|
|
|
mean value: 0.9726216533278254
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.9341197 0.93513346 0.9367969 0.91957927 0.92712259 0.92074847
|
|
0.92252684 0.93268156 0.93002224 0.92744589]
|
|
|
|
mean value: 0.9286176919937134
|
|
|
|
key: score_time
|
|
value: [0.00948143 0.00930476 0.00921249 0.00940442 0.00933552 0.00925016
|
|
0.00932121 0.00927615 0.00973344 0.00919533]
|
|
|
|
mean value: 0.009351491928100586
|
|
|
|
key: test_mcc
|
|
value: [1. 0.93548387 1. 0.87278605 0.96824584 0.93743687
|
|
0.96824584 0.96824584 1. 0.8688172 ]
|
|
|
|
mean value: 0.9519261499663041
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.96774194 1. 0.93548387 0.98387097 0.96774194
|
|
0.98387097 0.98387097 1. 0.93442623]
|
|
|
|
mean value: 0.9757006874669487
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.96774194 1. 0.9375 0.98360656 0.96666667
|
|
0.98412698 0.98360656 1. 0.93333333]
|
|
|
|
mean value: 0.9756582034364953
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96774194 1. 0.90909091 1. 1.
|
|
0.96875 1. 1. 0.93333333]
|
|
|
|
mean value: 0.9778916177908114
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 0.96774194 0.96774194 0.93548387
|
|
1. 0.96774194 1. 0.93333333]
|
|
|
|
mean value: 0.9739784946236559
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.96774194 1. 0.93548387 0.98387097 0.96774194
|
|
0.98387097 0.98387097 1. 0.9344086 ]
|
|
|
|
mean value: 0.9756989247311828
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.9375 1. 0.88235294 0.96774194 0.93548387
|
|
0.96875 0.96774194 1. 0.875 ]
|
|
|
|
mean value: 0.9534570683111955
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03188586 0.03635883 0.03134942 0.03185868 0.03210068 0.03209543
|
|
0.03178 0.03248978 0.0321188 0.0346837 ]
|
|
|
|
mean value: 0.032672119140625
|
|
|
|
key: score_time
|
|
value: [0.01282048 0.01319289 0.0133009 0.01523924 0.01509047 0.01763248
|
|
0.01769018 0.01737404 0.01742172 0.01940989]
|
|
|
|
mean value: 0.015917229652404784
|
|
|
|
key: test_mcc
|
|
value: [0.55301004 0.52981294 0.61290323 0.74193548 0.7130241 0.64820372
|
|
0.61807005 0.64820372 0.61090565 0.64708149]
|
|
|
|
mean value: 0.6323150429115658
|
|
|
|
key: train_mcc
|
|
value: [0.88433663 0.79939871 0.96813337 0.94707924 0.88509826 0.93340152
|
|
0.98561151 0.96813337 0.97855633 0.95780462]
|
|
|
|
mean value: 0.9307553567934764
|
|
|
|
key: test_accuracy
|
|
value: [0.77419355 0.75806452 0.80645161 0.87096774 0.85483871 0.82258065
|
|
0.80645161 0.82258065 0.80327869 0.81967213]
|
|
|
|
mean value: 0.8139079851930195
|
|
|
|
key: train_accuracy
|
|
value: [0.93884892 0.89208633 0.98381295 0.97302158 0.94244604 0.96582734
|
|
0.99280576 0.98381295 0.98922801 0.97845601]
|
|
|
|
mean value: 0.9640345892047583
|
|
|
|
key: test_fscore
|
|
value: [0.75862069 0.7826087 0.80645161 0.87096774 0.86153846 0.83076923
|
|
0.81818182 0.81355932 0.81818182 0.8 ]
|
|
|
|
mean value: 0.8160879390851283
|
|
|
|
key: train_fscore
|
|
value: [0.9348659 0.90163934 0.98354662 0.97237569 0.9430605 0.96684119
|
|
0.99280576 0.98354662 0.98913043 0.97802198]
|
|
|
|
mean value: 0.9645834024242367
|
|
|
|
key: test_precision
|
|
value: [0.81481481 0.71052632 0.80645161 0.87096774 0.82352941 0.79411765
|
|
0.77142857 0.85714286 0.77142857 0.88 ]
|
|
|
|
mean value: 0.8100407544266528
|
|
|
|
key: train_precision
|
|
value: [1. 0.82831325 1. 0.99622642 0.93309859 0.93898305
|
|
0.99280576 1. 0.99635036 1. ]
|
|
|
|
mean value: 0.9685777430862328
|
|
|
|
key: test_recall
|
|
value: [0.70967742 0.87096774 0.80645161 0.87096774 0.90322581 0.87096774
|
|
0.87096774 0.77419355 0.87096774 0.73333333]
|
|
|
|
mean value: 0.8281720430107526
|
|
|
|
key: train_recall
|
|
value: [0.87769784 0.98920863 0.9676259 0.94964029 0.95323741 0.99640288
|
|
0.99280576 0.9676259 0.98201439 0.95698925]
|
|
|
|
mean value: 0.9633248240117583
|
|
|
|
key: test_roc_auc
|
|
value: [0.77419355 0.75806452 0.80645161 0.87096774 0.85483871 0.82258065
|
|
0.80645161 0.82258065 0.80215054 0.81827957]
|
|
|
|
mean value: 0.8136559139784947
|
|
|
|
key: train_roc_auc
|
|
value: [0.93884892 0.89208633 0.98381295 0.97302158 0.94244604 0.96582734
|
|
0.99280576 0.98381295 0.98921508 0.97849462]
|
|
|
|
mean value: 0.9640371573708775
|
|
|
|
key: test_jcc
|
|
value: [0.61111111 0.64285714 0.67567568 0.77142857 0.75675676 0.71052632
|
|
0.69230769 0.68571429 0.69230769 0.66666667]
|
|
|
|
mean value: 0.6905351910615068
|
|
|
|
key: train_jcc
|
|
value: [0.87769784 0.82089552 0.9676259 0.94623656 0.89225589 0.93581081
|
|
0.98571429 0.9676259 0.97849462 0.95698925]
|
|
|
|
mean value: 0.9329346581564345
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03260899 0.03938699 0.03979325 0.03957844 0.03954554 0.04079795
|
|
0.03987575 0.03974915 0.0404129 0.0436821 ]
|
|
|
|
mean value: 0.03954310417175293
|
|
|
|
key: score_time
|
|
value: [0.0209713 0.01867533 0.01859689 0.01982999 0.01872468 0.01870179
|
|
0.01865554 0.01876974 0.01882648 0.02204275]
|
|
|
|
mean value: 0.019379448890686036
|
|
|
|
key: test_mcc
|
|
value: [0.87278605 0.90369611 0.77459667 0.84983659 0.93743687 0.93548387
|
|
0.93548387 0.93743687 0.83655914 0.8688172 ]
|
|
|
|
mean value: 0.8852133236276603
|
|
|
|
key: train_mcc
|
|
value: [0.95705746 0.95693359 0.96048758 0.96405373 0.94619622 0.95339163
|
|
0.93900081 0.95353974 0.95347639 0.96065614]
|
|
|
|
mean value: 0.9544793279173203
|
|
|
|
key: test_accuracy
|
|
value: [0.93548387 0.9516129 0.88709677 0.91935484 0.96774194 0.96774194
|
|
0.96774194 0.96774194 0.91803279 0.93442623]
|
|
|
|
mean value: 0.9416975145425701
|
|
|
|
key: train_accuracy
|
|
value: [0.97841727 0.97841727 0.98021583 0.98201439 0.97302158 0.97661871
|
|
0.96942446 0.97661871 0.97666068 0.98025135]
|
|
|
|
mean value: 0.9771660230164163
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.95238095 0.88888889 0.92537313 0.96875 0.96774194
|
|
0.96774194 0.96666667 0.91803279 0.93333333]
|
|
|
|
mean value: 0.9426409633451187
|
|
|
|
key: train_fscore
|
|
value: [0.97864769 0.97857143 0.980322 0.98207885 0.97326203 0.97682709
|
|
0.96969697 0.97690941 0.97682709 0.98046181]
|
|
|
|
mean value: 0.9773604388336684
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.9375 0.875 0.86111111 0.93939394 0.96774194
|
|
0.96774194 1. 0.93333333 0.93333333]
|
|
|
|
mean value: 0.9324246497230368
|
|
|
|
key: train_precision
|
|
value: [0.96830986 0.97163121 0.97508897 0.97857143 0.96466431 0.96819788
|
|
0.96113074 0.96491228 0.96819788 0.97183099]
|
|
|
|
mean value: 0.9692535540709742
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.90322581 1. 1. 0.96774194
|
|
0.96774194 0.93548387 0.90322581 0.93333333]
|
|
|
|
mean value: 0.9546236559139785
|
|
|
|
key: train_recall
|
|
value: [0.98920863 0.98561151 0.98561151 0.98561151 0.98201439 0.98561151
|
|
0.97841727 0.98920863 0.98561151 0.98924731]
|
|
|
|
mean value: 0.9856153786648101
|
|
|
|
key: test_roc_auc
|
|
value: [0.93548387 0.9516129 0.88709677 0.91935484 0.96774194 0.96774194
|
|
0.96774194 0.96774194 0.91827957 0.9344086 ]
|
|
|
|
mean value: 0.9417204301075269
|
|
|
|
key: train_roc_auc
|
|
value: [0.97841727 0.97841727 0.98021583 0.98201439 0.97302158 0.97661871
|
|
0.96942446 0.97661871 0.97667672 0.98023517]
|
|
|
|
mean value: 0.9771660091281814
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.90909091 0.8 0.86111111 0.93939394 0.9375
|
|
0.9375 0.93548387 0.84848485 0.875 ]
|
|
|
|
mean value: 0.8925917620225021
|
|
|
|
key: train_jcc
|
|
value: [0.95818815 0.95804196 0.96140351 0.96478873 0.94791667 0.95470383
|
|
0.94117647 0.95486111 0.95470383 0.96167247]
|
|
|
|
mean value: 0.9557456740257194
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25235701 0.16581655 0.29582691 0.30781555 0.30480957 0.29897332
|
|
0.36340833 0.34521413 0.30596662 0.31356263]
|
|
|
|
mean value: 0.29537506103515626
|
|
|
|
key: score_time
|
|
value: [0.0121603 0.01885104 0.01891851 0.01890349 0.01882386 0.01906419
|
|
0.02405453 0.01878023 0.01878715 0.01880646]
|
|
|
|
mean value: 0.01871497631072998
|
|
|
|
key: test_mcc
|
|
value: [0.87278605 0.87096774 0.93743687 0.84266484 0.93743687 0.93548387
|
|
0.93548387 0.90748521 0.9344086 0.83638369]
|
|
|
|
mean value: 0.9010537613218687
|
|
|
|
key: train_mcc
|
|
value: [0.95705746 0.96768225 0.96768225 0.97124816 0.96058703 0.95339163
|
|
0.93900081 0.96778244 0.96784094 0.97855633]
|
|
|
|
mean value: 0.9630829293630891
|
|
|
|
key: test_accuracy
|
|
value: [0.93548387 0.93548387 0.96774194 0.91935484 0.96774194 0.96774194
|
|
0.96774194 0.9516129 0.96721311 0.91803279]
|
|
|
|
mean value: 0.9498149127445796
|
|
|
|
key: train_accuracy
|
|
value: [0.97841727 0.98381295 0.98381295 0.98561151 0.98021583 0.97661871
|
|
0.96942446 0.98381295 0.98384201 0.98922801]
|
|
|
|
mean value: 0.9814796636658357
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.93548387 0.96875 0.92307692 0.96875 0.96774194
|
|
0.96774194 0.94915254 0.96774194 0.91525424]
|
|
|
|
mean value: 0.9501193380157295
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:135: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:138: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.97864769 0.98389982 0.98389982 0.98566308 0.98039216 0.97682709
|
|
0.96969697 0.98395722 0.98395722 0.98932384]
|
|
|
|
mean value: 0.9816264914441175
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.93548387 0.93939394 0.88235294 0.93939394 0.96774194
|
|
0.96774194 1. 0.96774194 0.93103448]
|
|
|
|
mean value: 0.9439975889233234
|
|
|
|
key: train_precision
|
|
value: [0.96830986 0.97864769 0.97864769 0.98214286 0.97173145 0.96819788
|
|
0.96113074 0.97526502 0.97526502 0.98233216]
|
|
|
|
mean value: 0.9741670351447366
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 1. 0.96774194 1. 0.96774194
|
|
0.96774194 0.90322581 0.96774194 0.9 ]
|
|
|
|
mean value: 0.957741935483871
|
|
|
|
key: train_recall
|
|
value: [0.98920863 0.98920863 0.98920863 0.98920863 0.98920863 0.98561151
|
|
0.97841727 0.99280576 0.99280576 0.99641577]
|
|
|
|
mean value: 0.9892099223846729
|
|
|
|
key: test_roc_auc
|
|
value: [0.93548387 0.93548387 0.96774194 0.91935484 0.96774194 0.96774194
|
|
0.96774194 0.9516129 0.9672043 0.91774194]
|
|
|
|
mean value: 0.9497849462365592
|
|
|
|
key: train_roc_auc
|
|
value: [0.97841727 0.98381295 0.98381295 0.98561151 0.98021583 0.97661871
|
|
0.96942446 0.98381295 0.98385807 0.98921508]
|
|
|
|
mean value: 0.9814799773084758
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.87878788 0.93939394 0.85714286 0.93939394 0.9375
|
|
0.9375 0.90322581 0.9375 0.84375 ]
|
|
|
|
mean value: 0.9056547362346699
|
|
|
|
key: train_jcc
|
|
value: [0.95818815 0.96830986 0.96830986 0.97173145 0.96153846 0.95470383
|
|
0.94117647 0.96842105 0.96842105 0.97887324]
|
|
|
|
mean value: 0.9639673429962302
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.47
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03648996 0.06613564 0.06211376 0.06729698 0.04555464 0.04708934
|
|
0.04693675 0.05631852 0.06484437 0.04323673]
|
|
|
|
mean value: 0.05360167026519776
|
|
|
|
key: score_time
|
|
value: [0.01524282 0.01525187 0.0154314 0.02462435 0.01772332 0.01929474
|
|
0.0188992 0.01929641 0.01779008 0.015306 ]
|
|
|
|
mean value: 0.017886018753051756
|
|
|
|
key: test_mcc
|
|
value: [0.93548387 0.64549722 0.83914639 0.79471941 0.83914639 0.93548387
|
|
0.67883359 0.80813523 0.67204301 0.80516731]
|
|
|
|
mean value: 0.7953656310014937
|
|
|
|
key: train_mcc
|
|
value: [0.89986294 0.86386843 0.84690871 0.85666952 0.83184526 0.84927258
|
|
0.86038603 0.86065376 0.85403593 0.85691637]
|
|
|
|
mean value: 0.858041952871322
|
|
|
|
key: test_accuracy
|
|
value: [0.96774194 0.82258065 0.91935484 0.88709677 0.91935484 0.96774194
|
|
0.83870968 0.90322581 0.83606557 0.90163934]
|
|
|
|
mean value: 0.896351136964569
|
|
|
|
key: train_accuracy
|
|
value: [0.94964029 0.93165468 0.92266187 0.92805755 0.91546763 0.92446043
|
|
0.92985612 0.92985612 0.92639138 0.92818671]
|
|
|
|
mean value: 0.9286232773206928
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.82539683 0.92063492 0.89855072 0.92063492 0.96774194
|
|
0.84375 0.9 0.83870968 0.90322581]
|
|
|
|
mean value: 0.8986386746143057
|
|
|
|
key: train_fscore
|
|
value: [0.95053004 0.93286219 0.92495637 0.92932862 0.91739895 0.92553191
|
|
0.93121693 0.9314587 0.92819615 0.92957746]
|
|
|
|
mean value: 0.9301057321039911
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.8125 0.90625 0.81578947 0.90625 0.96774194
|
|
0.81818182 0.93103448 0.83870968 0.875 ]
|
|
|
|
mean value: 0.8839199323011746
|
|
|
|
key: train_precision
|
|
value: [0.93402778 0.91666667 0.89830508 0.91319444 0.89690722 0.91258741
|
|
0.91349481 0.91065292 0.90443686 0.91349481]
|
|
|
|
mean value: 0.911376800312453
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.83870968 0.93548387 1. 0.93548387 0.96774194
|
|
0.87096774 0.87096774 0.83870968 0.93333333]
|
|
|
|
mean value: 0.9159139784946236
|
|
|
|
key: train_recall
|
|
value: [0.9676259 0.94964029 0.95323741 0.94604317 0.93884892 0.93884892
|
|
0.94964029 0.95323741 0.95323741 0.94623656]
|
|
|
|
mean value: 0.949659627137
|
|
|
|
key: test_roc_auc
|
|
value: [0.96774194 0.82258065 0.91935484 0.88709677 0.91935484 0.96774194
|
|
0.83870968 0.90322581 0.83602151 0.90215054]
|
|
|
|
mean value: 0.8963978494623657
|
|
|
|
key: train_roc_auc
|
|
value: [0.94964029 0.93165468 0.92266187 0.92805755 0.91546763 0.92446043
|
|
0.92985612 0.92985612 0.92643949 0.92815425]
|
|
|
|
mean value: 0.9286248420618344
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.7027027 0.85294118 0.81578947 0.85294118 0.9375
|
|
0.72972973 0.81818182 0.72222222 0.82352941]
|
|
|
|
mean value: 0.8193037711226565
|
|
|
|
key: train_jcc
|
|
value: [0.90572391 0.87417219 0.86038961 0.8679868 0.8474026 0.86138614
|
|
0.87128713 0.87171053 0.86601307 0.86842105]
|
|
|
|
mean value: 0.8694493015795971
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.92916417 0.92191863 1.12244606 0.9350934 1.06614399 0.9190464
|
|
1.05714083 0.94225955 1.10928631 0.9236002 ]
|
|
|
|
mean value: 0.99260995388031
|
|
|
|
key: score_time
|
|
value: [0.01470137 0.0223763 0.01537371 0.01549363 0.01523066 0.01540756
|
|
0.01223493 0.01533461 0.01543403 0.01531196]
|
|
|
|
mean value: 0.015689873695373537
|
|
|
|
key: test_mcc
|
|
value: [0.93548387 0.90369611 0.96824584 0.90369611 0.90748521 0.96824584
|
|
0.90369611 0.90748521 0.80322581 0.90586325]
|
|
|
|
mean value: 0.9107123373005651
|
|
|
|
key: train_mcc
|
|
value: [0.97844259 0.97844259 1. 1. 1. 0.97482645
|
|
0.99640932 0.97482645 0.98205307 1. ]
|
|
|
|
mean value: 0.9885000467162429
|
|
|
|
key: test_accuracy
|
|
value: [0.96774194 0.9516129 0.98387097 0.9516129 0.9516129 0.98387097
|
|
0.9516129 0.9516129 0.90163934 0.95081967]
|
|
|
|
mean value: 0.9546007403490218
|
|
|
|
key: train_accuracy
|
|
value: [0.98920863 0.98920863 1. 1. 1. 0.98741007
|
|
0.99820144 0.98741007 0.99102334 1. ]
|
|
|
|
mean value: 0.9942462188238637
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.95238095 0.98360656 0.95238095 0.94915254 0.98360656
|
|
0.95081967 0.94915254 0.90322581 0.94736842]
|
|
|
|
mean value: 0.9539435939381029
|
|
|
|
key: train_fscore
|
|
value: [0.98924731 0.98924731 1. 1. 1. 0.98743268
|
|
0.9981982 0.98743268 0.99102334 1. ]
|
|
|
|
mean value: 0.9942581511261652
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.9375 1. 0.9375 1. 1.
|
|
0.96666667 1. 0.90322581 1. ]
|
|
|
|
mean value: 0.9712634408602151
|
|
|
|
key: train_precision
|
|
value: [0.98571429 0.98571429 1. 1. 1. 0.98566308
|
|
1. 0.98566308 0.98924731 1. ]
|
|
|
|
mean value: 0.993200204813108
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.96774194 0.96774194 0.90322581 0.96774194
|
|
0.93548387 0.90322581 0.90322581 0.9 ]
|
|
|
|
mean value: 0.9383870967741935
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.99280576 1. 1. 1. 0.98920863
|
|
0.99640288 0.98920863 0.99280576 1. ]
|
|
|
|
mean value: 0.9953237410071942
|
|
|
|
key: test_roc_auc
|
|
value: [0.96774194 0.9516129 0.98387097 0.9516129 0.9516129 0.98387097
|
|
0.9516129 0.9516129 0.9016129 0.95 ]
|
|
|
|
mean value: 0.9545161290322581
|
|
|
|
key: train_roc_auc
|
|
value: [0.98920863 0.98920863 1. 1. 1. 0.98741007
|
|
0.99820144 0.98741007 0.99102653 1. ]
|
|
|
|
mean value: 0.9942465382532684
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.90909091 0.96774194 0.90909091 0.90322581 0.96774194
|
|
0.90625 0.90322581 0.82352941 0.9 ]
|
|
|
|
mean value: 0.9127396713817492
|
|
|
|
key: train_jcc
|
|
value: [0.9787234 0.9787234 1. 1. 1. 0.9751773
|
|
0.99640288 0.9751773 0.98220641 1. ]
|
|
|
|
mean value: 0.9886410701831508
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.39
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01557422 0.01259375 0.01137304 0.01015782 0.01030827 0.01022148
|
|
0.010221 0.01175594 0.01026297 0.01016307]
|
|
|
|
mean value: 0.011263155937194824
|
|
|
|
key: score_time
|
|
value: [0.01154852 0.01055121 0.00985241 0.00890565 0.00945139 0.00883079
|
|
0.00884581 0.00965166 0.0088501 0.00897288]
|
|
|
|
mean value: 0.009546041488647461
|
|
|
|
key: test_mcc
|
|
value: [0.48488114 0.22580645 0.71004695 0.62471615 0.71004695 0.84266484
|
|
0.42289003 0.5809475 0.50807349 0.60645161]
|
|
|
|
mean value: 0.5716525113259076
|
|
|
|
key: train_mcc
|
|
value: [0.53865672 0.62262853 0.58994332 0.60090995 0.59713776 0.61151079
|
|
0.61520743 0.58992806 0.60144143 0.60146217]
|
|
|
|
mean value: 0.596882614684279
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.61290323 0.85483871 0.80645161 0.85483871 0.91935484
|
|
0.70967742 0.79032258 0.75409836 0.80327869]
|
|
|
|
mean value: 0.7847699629825489
|
|
|
|
key: train_accuracy
|
|
value: [0.76438849 0.81115108 0.79496403 0.80035971 0.79856115 0.8057554
|
|
0.80755396 0.79496403 0.80071813 0.80071813]
|
|
|
|
mean value: 0.7979134107435775
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.61290323 0.85714286 0.82352941 0.85245902 0.92307692
|
|
0.72727273 0.78688525 0.76190476 0.8 ]
|
|
|
|
mean value: 0.7895174169263509
|
|
|
|
key: train_fscore
|
|
value: [0.78489327 0.81415929 0.79569892 0.80284192 0.79928315 0.8057554
|
|
0.80580762 0.79496403 0.80071813 0.80213904]
|
|
|
|
mean value: 0.8006260774087884
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.61290323 0.84375 0.75675676 0.86666667 0.88235294
|
|
0.68571429 0.8 0.75 0.8 ]
|
|
|
|
mean value: 0.7725416603393359
|
|
|
|
key: train_precision
|
|
value: [0.72205438 0.80139373 0.79285714 0.79298246 0.79642857 0.8057554
|
|
0.81318681 0.79496403 0.79928315 0.79787234]
|
|
|
|
mean value: 0.7916778011508354
|
|
|
|
key: test_recall
|
|
value: [0.77419355 0.61290323 0.87096774 0.90322581 0.83870968 0.96774194
|
|
0.77419355 0.77419355 0.77419355 0.8 ]
|
|
|
|
mean value: 0.8090322580645162
|
|
|
|
key: train_recall
|
|
value: [0.85971223 0.82733813 0.79856115 0.81294964 0.80215827 0.8057554
|
|
0.79856115 0.79496403 0.80215827 0.80645161]
|
|
|
|
mean value: 0.8108609886284521
|
|
|
|
key: test_roc_auc
|
|
value: [0.74193548 0.61290323 0.85483871 0.80645161 0.85483871 0.91935484
|
|
0.70967742 0.79032258 0.75376344 0.80322581]
|
|
|
|
mean value: 0.784731182795699
|
|
|
|
key: train_roc_auc
|
|
value: [0.76438849 0.81115108 0.79496403 0.80035971 0.79856115 0.8057554
|
|
0.80755396 0.79496403 0.80072071 0.80070782]
|
|
|
|
mean value: 0.79791263763183
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.44186047 0.75 0.7 0.74285714 0.85714286
|
|
0.57142857 0.64864865 0.61538462 0.66666667]
|
|
|
|
mean value: 0.6593988967244782
|
|
|
|
key: train_jcc
|
|
value: [0.64594595 0.68656716 0.66071429 0.67062315 0.66567164 0.6746988
|
|
0.67477204 0.65970149 0.66766467 0.66964286]
|
|
|
|
mean value: 0.6676002035024714
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0104816 0.01038766 0.01042938 0.01051688 0.01049376 0.01051188
|
|
0.01045275 0.01059914 0.01046395 0.01043129]
|
|
|
|
mean value: 0.010476827621459961
|
|
|
|
key: score_time
|
|
value: [0.00892305 0.00895 0.00893521 0.0091629 0.00889969 0.00888014
|
|
0.008919 0.00981951 0.00892782 0.00894523]
|
|
|
|
mean value: 0.0090362548828125
|
|
|
|
key: test_mcc
|
|
value: [0.64820372 0.55301004 0.5809475 0.49319696 0.74348441 0.67883359
|
|
0.58338335 0.58338335 0.57419355 0.73763441]
|
|
|
|
mean value: 0.6176270892557514
|
|
|
|
key: train_mcc
|
|
value: [0.6587893 0.67364319 0.6549475 0.6870548 0.6618705 0.68389584
|
|
0.67364319 0.68084793 0.67053524 0.65891743]
|
|
|
|
mean value: 0.6704144922517364
|
|
|
|
key: test_accuracy
|
|
value: [0.82258065 0.77419355 0.79032258 0.74193548 0.87096774 0.83870968
|
|
0.79032258 0.79032258 0.78688525 0.86885246]
|
|
|
|
mean value: 0.807509254362771
|
|
|
|
key: train_accuracy
|
|
value: [0.82913669 0.83633094 0.82733813 0.84352518 0.83093525 0.84172662
|
|
0.83633094 0.83992806 0.83482944 0.82944345]
|
|
|
|
mean value: 0.8349524689045891
|
|
|
|
key: test_fscore
|
|
value: [0.81355932 0.78787879 0.79365079 0.76470588 0.86666667 0.83333333
|
|
0.8 0.8 0.78688525 0.86666667]
|
|
|
|
mean value: 0.8113346698484727
|
|
|
|
key: train_fscore
|
|
value: [0.8324515 0.84063047 0.82978723 0.8438061 0.83093525 0.84452297
|
|
0.84063047 0.8441331 0.83859649 0.83065954]
|
|
|
|
mean value: 0.8376153130590535
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.74285714 0.78125 0.7027027 0.89655172 0.86206897
|
|
0.76470588 0.76470588 0.8 0.86666667]
|
|
|
|
mean value: 0.8038651823730424
|
|
|
|
key: train_precision
|
|
value: [0.816609 0.81911263 0.81818182 0.84229391 0.83093525 0.82986111
|
|
0.81911263 0.8225256 0.81849315 0.82624113]
|
|
|
|
mean value: 0.8243366223120344
|
|
|
|
key: test_recall
|
|
value: [0.77419355 0.83870968 0.80645161 0.83870968 0.83870968 0.80645161
|
|
0.83870968 0.83870968 0.77419355 0.86666667]
|
|
|
|
mean value: 0.8221505376344086
|
|
|
|
key: train_recall
|
|
value: [0.84892086 0.86330935 0.84172662 0.84532374 0.83093525 0.85971223
|
|
0.86330935 0.86690647 0.85971223 0.83512545]
|
|
|
|
mean value: 0.8514981563136588
|
|
|
|
key: test_roc_auc
|
|
value: [0.82258065 0.77419355 0.79032258 0.74193548 0.87096774 0.83870968
|
|
0.79032258 0.79032258 0.78709677 0.8688172 ]
|
|
|
|
mean value: 0.8075268817204301
|
|
|
|
key: train_roc_auc
|
|
value: [0.82913669 0.83633094 0.82733813 0.84352518 0.83093525 0.84172662
|
|
0.83633094 0.83992806 0.83487404 0.82943323]
|
|
|
|
mean value: 0.8349559062427477
|
|
|
|
key: test_jcc
|
|
value: [0.68571429 0.65 0.65789474 0.61904762 0.76470588 0.71428571
|
|
0.66666667 0.66666667 0.64864865 0.76470588]
|
|
|
|
mean value: 0.6838336102577589
|
|
|
|
key: train_jcc
|
|
value: [0.71299094 0.72507553 0.70909091 0.72981366 0.71076923 0.73088685
|
|
0.72507553 0.73030303 0.72205438 0.71036585]
|
|
|
|
mean value: 0.7206425913193242
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01108789 0.0107801 0.01099753 0.01079965 0.0109024 0.00961852
|
|
0.00987792 0.01094413 0.01082826 0.01088762]
|
|
|
|
mean value: 0.010672402381896973
|
|
|
|
key: score_time
|
|
value: [0.0130856 0.01319814 0.01321411 0.01798558 0.01740742 0.01253605
|
|
0.01425529 0.01265335 0.01299381 0.01311779]
|
|
|
|
mean value: 0.014044713973999024
|
|
|
|
key: test_mcc
|
|
value: [0.43405737 0.42023032 0.51856298 0.45184806 0.61418277 0.58834841
|
|
0.39223227 0.52981294 0.44301075 0.44241145]
|
|
|
|
mean value: 0.48346973185248315
|
|
|
|
key: train_mcc
|
|
value: [0.70569372 0.71873948 0.65294473 0.69525741 0.67838657 0.69587174
|
|
0.71239616 0.72061896 0.71764405 0.70742859]
|
|
|
|
mean value: 0.7004981411109261
|
|
|
|
key: test_accuracy
|
|
value: [0.70967742 0.70967742 0.75806452 0.72580645 0.80645161 0.79032258
|
|
0.69354839 0.75806452 0.72131148 0.72131148]
|
|
|
|
mean value: 0.7394235854045479
|
|
|
|
key: train_accuracy
|
|
value: [0.85251799 0.85791367 0.82553957 0.8471223 0.8381295 0.8471223
|
|
0.85611511 0.85971223 0.85816876 0.85278276]
|
|
|
|
mean value: 0.8495124187902819
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.71875 0.74576271 0.72131148 0.8 0.77192982
|
|
0.71641791 0.72727273 0.72131148 0.71186441]
|
|
|
|
mean value: 0.7301287198412298
|
|
|
|
key: train_fscore
|
|
value: [0.84926471 0.85122411 0.81869159 0.84288355 0.83146067 0.84171322
|
|
0.85454545 0.85555556 0.85343228 0.84758364]
|
|
|
|
mean value: 0.8446354780098349
|
|
|
|
key: test_precision
|
|
value: [0.7826087 0.6969697 0.78571429 0.73333333 0.82758621 0.84615385
|
|
0.66666667 0.83333333 0.73333333 0.72413793]
|
|
|
|
mean value: 0.7629837329087704
|
|
|
|
key: train_precision
|
|
value: [0.86842105 0.89328063 0.85214008 0.86692015 0.8671875 0.87258687
|
|
0.86397059 0.88167939 0.88122605 0.88030888]
|
|
|
|
mean value: 0.8727721199038784
|
|
|
|
key: test_recall
|
|
value: [0.58064516 0.74193548 0.70967742 0.70967742 0.77419355 0.70967742
|
|
0.77419355 0.64516129 0.70967742 0.7 ]
|
|
|
|
mean value: 0.7054838709677419
|
|
|
|
key: train_recall
|
|
value: [0.83093525 0.81294964 0.78776978 0.82014388 0.79856115 0.81294964
|
|
0.84532374 0.83093525 0.82733813 0.8172043 ]
|
|
|
|
mean value: 0.8184110775895412
|
|
|
|
key: test_roc_auc
|
|
value: [0.70967742 0.70967742 0.75806452 0.72580645 0.80645161 0.79032258
|
|
0.69354839 0.75806452 0.72150538 0.72096774]
|
|
|
|
mean value: 0.7394086021505376
|
|
|
|
key: train_roc_auc
|
|
value: [0.85251799 0.85791367 0.82553957 0.8471223 0.8381295 0.8471223
|
|
0.85611511 0.85971223 0.85811351 0.85284675]
|
|
|
|
mean value: 0.849513292591733
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.56097561 0.59459459 0.56410256 0.66666667 0.62857143
|
|
0.55813953 0.57142857 0.56410256 0.55263158]
|
|
|
|
mean value: 0.5761213113053576
|
|
|
|
key: train_jcc
|
|
value: [0.73801917 0.74098361 0.69303797 0.7284345 0.71153846 0.7266881
|
|
0.74603175 0.74757282 0.74433657 0.73548387]
|
|
|
|
mean value: 0.7312126821907436
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03354144 0.02489185 0.02916217 0.02847791 0.02713418 0.02547169
|
|
0.02499604 0.025105 0.02454877 0.02502036]
|
|
|
|
mean value: 0.026834940910339354
|
|
|
|
key: score_time
|
|
value: [0.0131855 0.01280403 0.01284194 0.01289439 0.01388645 0.01286411
|
|
0.01271367 0.01277924 0.01253557 0.01256752]
|
|
|
|
mean value: 0.01290724277496338
|
|
|
|
key: test_mcc
|
|
value: [0.81325006 0.58834841 0.7190925 0.70116959 0.83914639 0.83914639
|
|
0.64820372 0.74348441 0.67314268 0.74460444]
|
|
|
|
mean value: 0.7309588598194308
|
|
|
|
key: train_mcc
|
|
value: [0.79601542 0.78762489 0.79209132 0.79002705 0.79151169 0.78818066
|
|
0.80264269 0.78357621 0.79775107 0.82936203]
|
|
|
|
mean value: 0.7958783038857233
|
|
|
|
key: test_accuracy
|
|
value: [0.90322581 0.79032258 0.85483871 0.83870968 0.91935484 0.91935484
|
|
0.82258065 0.87096774 0.83606557 0.86885246]
|
|
|
|
mean value: 0.8624272871496562
|
|
|
|
key: train_accuracy
|
|
value: [0.89568345 0.89208633 0.89388489 0.89388489 0.89388489 0.89208633
|
|
0.89928058 0.88848921 0.89587074 0.91382406]
|
|
|
|
mean value: 0.8958975369076373
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.80597015 0.86567164 0.85714286 0.92063492 0.92063492
|
|
0.83076923 0.875 0.84375 0.875 ]
|
|
|
|
mean value: 0.8703664629317615
|
|
|
|
key: train_fscore
|
|
value: [0.90102389 0.89690722 0.8991453 0.89774697 0.89879931 0.89726027
|
|
0.90410959 0.89527027 0.90169492 0.91666667]
|
|
|
|
mean value: 0.9008624402594712
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.75 0.80555556 0.76923077 0.90625 0.90625
|
|
0.79411765 0.84848485 0.81818182 0.82352941]
|
|
|
|
mean value: 0.8278742907419379
|
|
|
|
key: train_precision
|
|
value: [0.85714286 0.85855263 0.85667752 0.86622074 0.85901639 0.85620915
|
|
0.8627451 0.84394904 0.8525641 0.88888889]
|
|
|
|
mean value: 0.860196642678534
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.87096774 0.93548387 0.96774194 0.93548387 0.93548387
|
|
0.87096774 0.90322581 0.87096774 0.93333333]
|
|
|
|
mean value: 0.9191397849462366
|
|
|
|
key: train_recall
|
|
value: [0.94964029 0.93884892 0.94604317 0.93165468 0.94244604 0.94244604
|
|
0.94964029 0.95323741 0.95683453 0.94623656]
|
|
|
|
mean value: 0.945702792604626
|
|
|
|
key: test_roc_auc
|
|
value: [0.90322581 0.79032258 0.85483871 0.83870968 0.91935484 0.91935484
|
|
0.82258065 0.87096774 0.83548387 0.86989247]
|
|
|
|
mean value: 0.86247311827957
|
|
|
|
key: train_roc_auc
|
|
value: [0.89568345 0.89208633 0.89388489 0.89388489 0.89388489 0.89208633
|
|
0.89928058 0.88848921 0.89597999 0.91376576]
|
|
|
|
mean value: 0.8959026327325236
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.675 0.76315789 0.75 0.85294118 0.85294118
|
|
0.71052632 0.77777778 0.72972973 0.77777778]
|
|
|
|
mean value: 0.7723185182086111
|
|
|
|
key: train_jcc
|
|
value: [0.81987578 0.81308411 0.81677019 0.81446541 0.81619938 0.8136646
|
|
0.825 0.81039755 0.82098765 0.84615385]
|
|
|
|
mean value: 0.8196598510899469
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.95948577 2.15034413 2.08079267 2.13337517 2.1066184 2.17374015
|
|
2.10629272 2.13330579 2.08265138 2.28587055]
|
|
|
|
mean value: 2.121247673034668
|
|
|
|
key: score_time
|
|
value: [0.01692438 0.01535654 0.01485014 0.02322173 0.02015829 0.01493502
|
|
0.02037382 0.01496053 0.02017426 0.02348351]
|
|
|
|
mean value: 0.018443822860717773
|
|
|
|
key: test_mcc
|
|
value: [0.93548387 0.87278605 0.93743687 0.87278605 0.96824584 0.90748521
|
|
0.77784447 0.93743687 0.80516731 0.81870035]
|
|
|
|
mean value: 0.883337288515988
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.99640932 0.99640932 1.
|
|
1. 1. 0.99284416 1. ]
|
|
|
|
mean value: 0.9985662806359359
|
|
|
|
key: test_accuracy
|
|
value: [0.96774194 0.93548387 0.96774194 0.93548387 0.98387097 0.9516129
|
|
0.88709677 0.96774194 0.90163934 0.90163934]
|
|
|
|
mean value: 0.9400052882072978
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.99820144 0.99820144 1.
|
|
1. 1. 0.99640934 1. ]
|
|
|
|
mean value: 0.9992812213424951
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.9375 0.96666667 0.9375 0.98360656 0.94915254
|
|
0.88135593 0.96666667 0.9 0.88888889]
|
|
|
|
mean value: 0.9379079189659413
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.99820467 0.9981982 1.
|
|
1. 1. 0.99638989 1. ]
|
|
|
|
mean value: 0.9992792757758504
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.90909091 1. 0.90909091 1. 1.
|
|
0.92857143 1. 0.93103448 1. ]
|
|
|
|
mean value: 0.9645529664995738
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.99641577 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996415770609319
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.93548387 0.96774194 0.96774194 0.90322581
|
|
0.83870968 0.93548387 0.87096774 0.8 ]
|
|
|
|
mean value: 0.915483870967742
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.99640288 1.
|
|
1. 1. 0.99280576 1. ]
|
|
|
|
mean value: 0.9989208633093525
|
|
|
|
key: test_roc_auc
|
|
value: [0.96774194 0.93548387 0.96774194 0.93548387 0.98387097 0.9516129
|
|
0.88709677 0.96774194 0.90215054 0.9 ]
|
|
|
|
mean value: 0.9398924731182796
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.99820144 0.99820144 1.
|
|
1. 1. 0.99640288 1. ]
|
|
|
|
mean value: 0.9992805755395684
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.88235294 0.93548387 0.88235294 0.96774194 0.90322581
|
|
0.78787879 0.93548387 0.81818182 0.8 ]
|
|
|
|
mean value: 0.8850201972284515
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.99641577 0.99640288 1.
|
|
1. 1. 0.99280576 1. ]
|
|
|
|
mean value: 0.9985624403702844
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.35
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02974105 0.01884413 0.02266383 0.02178121 0.02214408 0.02337813
|
|
0.02314329 0.02074575 0.01937366 0.0216651 ]
|
|
|
|
mean value: 0.0223480224609375
|
|
|
|
key: score_time
|
|
value: [0.01200747 0.00932431 0.00934696 0.00890899 0.00897503 0.00935054
|
|
0.00896859 0.00912809 0.00909376 0.0090301 ]
|
|
|
|
mean value: 0.009413385391235351
|
|
|
|
key: test_mcc
|
|
value: [1. 0.87096774 1. 0.90369611 1. 0.93743687
|
|
0.93743687 0.96824584 0.9344086 0.93635873]
|
|
|
|
mean value: 0.948855076082296
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93548387 1. 0.9516129 1. 0.96774194
|
|
0.96774194 0.98387097 0.96721311 0.96721311]
|
|
|
|
mean value: 0.9740877842411423
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.93548387 1. 0.95238095 1. 0.96666667
|
|
0.96666667 0.98360656 0.96774194 0.96551724]
|
|
|
|
mean value: 0.9738063890922258
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.93548387 1. 0.9375 1. 1.
|
|
1. 1. 0.96774194 1. ]
|
|
|
|
mean value: 0.9840725806451613
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.93548387 1. 0.96774194 1. 0.93548387
|
|
0.93548387 0.96774194 0.96774194 0.93333333]
|
|
|
|
mean value: 0.9643010752688173
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93548387 1. 0.9516129 1. 0.96774194
|
|
0.96774194 0.98387097 0.9672043 0.96666667]
|
|
|
|
mean value: 0.9740322580645162
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.87878788 1. 0.90909091 1. 0.93548387
|
|
0.93548387 0.96774194 0.9375 0.93333333]
|
|
|
|
mean value: 0.9497421798631476
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12657785 0.12585592 0.12568116 0.1251936 0.12818575 0.12904644
|
|
0.12710953 0.12752652 0.12936378 0.12759137]
|
|
|
|
mean value: 0.127213191986084
|
|
|
|
key: score_time
|
|
value: [0.0178988 0.01849961 0.01826978 0.0182507 0.01817966 0.01839781
|
|
0.01924253 0.01940107 0.01964402 0.0183084 ]
|
|
|
|
mean value: 0.018609237670898438
|
|
|
|
key: test_mcc
|
|
value: [0.90369611 0.80813523 0.90748521 0.90369611 1. 0.90748521
|
|
0.83914639 0.87278605 0.83655914 0.96770777]
|
|
|
|
mean value: 0.8946697236305513
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9516129 0.90322581 0.9516129 0.9516129 1. 0.9516129
|
|
0.91935484 0.93548387 0.91803279 0.98360656]
|
|
|
|
mean value: 0.9466155473294553
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.90625 0.95384615 0.95238095 1. 0.94915254
|
|
0.92063492 0.9375 0.91803279 0.98305085]
|
|
|
|
mean value: 0.9473229155958733
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.87878788 0.91176471 0.9375 1. 1.
|
|
0.90625 0.90909091 0.93333333 1. ]
|
|
|
|
mean value: 0.9414226827094474
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 1. 0.96774194 1. 0.90322581
|
|
0.93548387 0.96774194 0.90322581 0.96666667]
|
|
|
|
mean value: 0.9547311827956989
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9516129 0.90322581 0.9516129 0.9516129 1. 0.9516129
|
|
0.91935484 0.93548387 0.91827957 0.98333333]
|
|
|
|
mean value: 0.9466129032258065
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.82857143 0.91176471 0.90909091 1. 0.90322581
|
|
0.85294118 0.88235294 0.84848485 0.96666667]
|
|
|
|
mean value: 0.9012189391885786
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.36
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01068401 0.01052618 0.0105834 0.01052594 0.0105145 0.01098371
|
|
0.01058984 0.010818 0.01097584 0.01046133]
|
|
|
|
mean value: 0.010666275024414062
|
|
|
|
key: score_time
|
|
value: [0.00883293 0.00899005 0.00913048 0.00909328 0.00905633 0.00895715
|
|
0.00912786 0.008919 0.00896478 0.00893164]
|
|
|
|
mean value: 0.009000349044799804
|
|
|
|
key: test_mcc
|
|
value: [0.55895656 0.77784447 0.49319696 0.65372045 0.77784447 0.7190925
|
|
0.75623534 0.80645161 0.64895138 0.90586325]
|
|
|
|
mean value: 0.7098156990531378
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77419355 0.88709677 0.74193548 0.82258065 0.88709677 0.85483871
|
|
0.87096774 0.90322581 0.81967213 0.95081967]
|
|
|
|
mean value: 0.8512427287149656
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.89230769 0.71428571 0.80701754 0.88135593 0.84210526
|
|
0.88235294 0.90322581 0.80701754 0.94736842]
|
|
|
|
mean value: 0.8427036858354704
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84 0.85294118 0.8 0.88461538 0.92857143 0.92307692
|
|
0.81081081 0.90322581 0.88461538 1. ]
|
|
|
|
mean value: 0.8827856914612133
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.67741935 0.93548387 0.64516129 0.74193548 0.83870968 0.77419355
|
|
0.96774194 0.90322581 0.74193548 0.9 ]
|
|
|
|
mean value: 0.8125806451612904
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.77419355 0.88709677 0.74193548 0.82258065 0.88709677 0.85483871
|
|
0.87096774 0.90322581 0.82096774 0.95 ]
|
|
|
|
mean value: 0.8512903225806452
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.80555556 0.55555556 0.67647059 0.78787879 0.72727273
|
|
0.78947368 0.82352941 0.67647059 0.9 ]
|
|
|
|
mean value: 0.7342206898708447
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.87444687 1.84364486 1.88241863 1.88341451 1.86338425 1.8370254
|
|
1.95046043 2.00012541 2.00316405 2.00365019]
|
|
|
|
mean value: 1.914173460006714
|
|
|
|
key: score_time
|
|
value: [0.09367371 0.09850073 0.09885931 0.09624052 0.09289575 0.09235954
|
|
0.10137892 0.10190368 0.10188222 0.10096431]
|
|
|
|
mean value: 0.0978658676147461
|
|
|
|
key: test_mcc
|
|
value: [1. 0.90369611 1. 0.90369611 1. 1.
|
|
0.90369611 0.96824584 0.93635873 0.93635873]
|
|
|
|
mean value: 0.9552051644792718
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9516129 1. 0.9516129 1. 1.
|
|
0.9516129 0.98387097 0.96721311 0.96721311]
|
|
|
|
mean value: 0.9773135906927551
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95238095 1. 0.95238095 1. 1.
|
|
0.95081967 0.98360656 0.96875 0.96551724]
|
|
|
|
mean value: 0.9773455375649411
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.9375 1. 0.9375 1. 1.
|
|
0.96666667 1. 0.93939394 1. ]
|
|
|
|
mean value: 0.9781060606060606
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 0.96774194 1. 1.
|
|
0.93548387 0.96774194 1. 0.93333333]
|
|
|
|
mean value: 0.9772043010752688
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.9516129 1. 0.9516129 1. 1.
|
|
0.9516129 0.98387097 0.96666667 0.96666667]
|
|
|
|
mean value: 0.9772043010752689
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.90909091 1. 0.90909091 1. 1.
|
|
0.90625 0.96774194 0.93939394 0.93333333]
|
|
|
|
mean value: 0.9564901026392962
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.23
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.04206729 1.01224828 1.05134225 1.03499246 0.99280858 1.02267337
|
|
0.99256825 0.97221684 1.08185887 1.01412582]
|
|
|
|
mean value: 1.0216902017593383
|
|
|
|
key: score_time
|
|
value: [0.19844055 0.23611355 0.22823763 0.22109938 0.14304137 0.20978165
|
|
0.21308875 0.23787618 0.25533915 0.21346045]
|
|
|
|
mean value: 0.21564786434173583
|
|
|
|
key: test_mcc
|
|
value: [1. 0.83914639 0.96824584 0.87831007 1. 1.
|
|
0.90369611 0.96824584 0.90204573 0.8688172 ]
|
|
|
|
mean value: 0.9328507181352339
|
|
|
|
key: train_mcc
|
|
value: [0.98207157 0.98202074 0.97132357 0.98563702 0.97844259 0.97844259
|
|
0.97497785 0.98207157 0.97848145 0.98210326]
|
|
|
|
mean value: 0.9795572214421221
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.91935484 0.98387097 0.93548387 1. 1.
|
|
0.9516129 0.98387097 0.95081967 0.93442623]
|
|
|
|
mean value: 0.9659439450026441
|
|
|
|
key: train_accuracy
|
|
value: [0.99100719 0.99100719 0.98561151 0.99280576 0.98920863 0.98920863
|
|
0.98741007 0.99100719 0.98922801 0.99102334]
|
|
|
|
mean value: 0.9897517533549461
|
|
|
|
key: test_fscore
|
|
value: [1. 0.92063492 0.98360656 0.93939394 1. 1.
|
|
0.95081967 0.98360656 0.95238095 0.93333333]
|
|
|
|
mean value: 0.9663775932628391
|
|
|
|
key: train_fscore
|
|
value: [0.99105546 0.99102334 0.98571429 0.99283154 0.98924731 0.98924731
|
|
0.98752228 0.99105546 0.98924731 0.99108734]
|
|
|
|
mean value: 0.9898031639746488
|
|
|
|
key: test_precision
|
|
value: [1. 0.90625 1. 0.88571429 1. 1.
|
|
0.96666667 1. 0.9375 0.93333333]
|
|
|
|
mean value: 0.9629464285714285
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.98576512 0.98924731 0.9787234 0.98928571 0.98571429 0.98571429
|
|
0.97879859 0.98576512 0.98571429 0.9858156 ]
|
|
|
|
mean value: 0.9850543726031485
|
|
|
|
key: test_recall
|
|
value: [1. 0.93548387 0.96774194 1. 1. 1.
|
|
0.93548387 0.96774194 0.96774194 0.93333333]
|
|
|
|
mean value: 0.970752688172043
|
|
|
|
key: train_recall
|
|
value: [0.99640288 0.99280576 0.99280576 0.99640288 0.99280576 0.99280576
|
|
0.99640288 0.99640288 0.99280576 0.99641577]
|
|
|
|
mean value: 0.9946056058379104
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.91935484 0.98387097 0.93548387 1. 1.
|
|
0.9516129 0.98387097 0.95053763 0.9344086 ]
|
|
|
|
mean value: 0.9659139784946237
|
|
|
|
key: train_roc_auc
|
|
value: [0.99100719 0.99100719 0.98561151 0.99280576 0.98920863 0.98920863
|
|
0.98741007 0.99100719 0.98923442 0.99101364]
|
|
|
|
mean value: 0.9897514246667182
|
|
|
|
key: test_jcc
|
|
value: [1. 0.85294118 0.96774194 0.88571429 1. 1.
|
|
0.90625 0.96774194 0.90909091 0.875 ]
|
|
|
|
mean value: 0.9364480242243525
|
|
|
|
key: train_jcc
|
|
value: [0.9822695 0.98220641 0.97183099 0.98576512 0.9787234 0.9787234
|
|
0.97535211 0.9822695 0.9787234 0.98233216]
|
|
|
|
mean value: 0.9798196004175848
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.26
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01132822 0.01071429 0.01171923 0.0117836 0.01190042 0.01191497
|
|
0.01188326 0.01205683 0.01197553 0.01189804]
|
|
|
|
mean value: 0.011717438697814941
|
|
|
|
key: score_time
|
|
value: [0.0150733 0.0090642 0.00976753 0.00985432 0.00976515 0.00981903
|
|
0.00985765 0.0098536 0.00985885 0.00983262]
|
|
|
|
mean value: 0.010274624824523926
|
|
|
|
key: test_mcc
|
|
value: [0.64820372 0.55301004 0.5809475 0.49319696 0.74348441 0.67883359
|
|
0.58338335 0.58338335 0.57419355 0.73763441]
|
|
|
|
mean value: 0.6176270892557514
|
|
|
|
key: train_mcc
|
|
value: [0.6587893 0.67364319 0.6549475 0.6870548 0.6618705 0.68389584
|
|
0.67364319 0.68084793 0.67053524 0.65891743]
|
|
|
|
mean value: 0.6704144922517364
|
|
|
|
key: test_accuracy
|
|
value: [0.82258065 0.77419355 0.79032258 0.74193548 0.87096774 0.83870968
|
|
0.79032258 0.79032258 0.78688525 0.86885246]
|
|
|
|
mean value: 0.807509254362771
|
|
|
|
key: train_accuracy
|
|
value: [0.82913669 0.83633094 0.82733813 0.84352518 0.83093525 0.84172662
|
|
0.83633094 0.83992806 0.83482944 0.82944345]
|
|
|
|
mean value: 0.8349524689045891
|
|
|
|
key: test_fscore
|
|
value: [0.81355932 0.78787879 0.79365079 0.76470588 0.86666667 0.83333333
|
|
0.8 0.8 0.78688525 0.86666667]
|
|
|
|
mean value: 0.8113346698484727
|
|
|
|
key: train_fscore
|
|
value: [0.8324515 0.84063047 0.82978723 0.8438061 0.83093525 0.84452297
|
|
0.84063047 0.8441331 0.83859649 0.83065954]
|
|
|
|
mean value: 0.8376153130590535
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.74285714 0.78125 0.7027027 0.89655172 0.86206897
|
|
0.76470588 0.76470588 0.8 0.86666667]
|
|
|
|
mean value: 0.8038651823730424
|
|
|
|
key: train_precision
|
|
value: [0.816609 0.81911263 0.81818182 0.84229391 0.83093525 0.82986111
|
|
0.81911263 0.8225256 0.81849315 0.82624113]
|
|
|
|
mean value: 0.8243366223120344
|
|
|
|
key: test_recall
|
|
value: [0.77419355 0.83870968 0.80645161 0.83870968 0.83870968 0.80645161
|
|
0.83870968 0.83870968 0.77419355 0.86666667]
|
|
|
|
mean value: 0.8221505376344086
|
|
|
|
key: train_recall
|
|
value: [0.84892086 0.86330935 0.84172662 0.84532374 0.83093525 0.85971223
|
|
0.86330935 0.86690647 0.85971223 0.83512545]
|
|
|
|
mean value: 0.8514981563136588
|
|
|
|
key: test_roc_auc
|
|
value: [0.82258065 0.77419355 0.79032258 0.74193548 0.87096774 0.83870968
|
|
0.79032258 0.79032258 0.78709677 0.8688172 ]
|
|
|
|
mean value: 0.8075268817204301
|
|
|
|
key: train_roc_auc
|
|
value: [0.82913669 0.83633094 0.82733813 0.84352518 0.83093525 0.84172662
|
|
0.83633094 0.83992806 0.83487404 0.82943323]
|
|
|
|
mean value: 0.8349559062427477
|
|
|
|
key: test_jcc
|
|
value: [0.68571429 0.65 0.65789474 0.61904762 0.76470588 0.71428571
|
|
0.66666667 0.66666667 0.64864865 0.76470588]
|
|
|
|
mean value: 0.6838336102577589
|
|
|
|
key: train_jcc
|
|
value: [0.71299094 0.72507553 0.70909091 0.72981366 0.71076923 0.73088685
|
|
0.72507553 0.73030303 0.72205438 0.71036585]
|
|
|
|
mean value: 0.7206425913193242
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.09153771 0.06604338 0.07178664 0.0755806 0.09721923 0.07113528
|
|
0.07501578 0.08107591 0.09011579 0.07580256]
|
|
|
|
mean value: 0.07953128814697266
|
|
|
|
key: score_time
|
|
value: [0.01134396 0.01080441 0.01218295 0.01085663 0.01143789 0.01116538
|
|
0.01172423 0.01230764 0.01139951 0.01124096]
|
|
|
|
mean value: 0.011446356773376465
|
|
|
|
key: test_mcc
|
|
value: [1. 0.93548387 1. 0.96824584 0.96824584 0.93743687
|
|
1. 0.93743687 1. 0.93635873]
|
|
|
|
mean value: 0.9683208010141471
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.96774194 1. 0.98387097 0.98387097 0.96774194
|
|
1. 0.96774194 1. 0.96721311]
|
|
|
|
mean value: 0.9838180856689582
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.96774194 1. 0.98412698 0.98360656 0.96666667
|
|
1. 0.96666667 1. 0.96551724]
|
|
|
|
mean value: 0.9834326051700548
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96774194 1. 0.96875 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9936491935483871
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 1. 0.96774194 0.93548387
|
|
1. 0.93548387 1. 0.93333333]
|
|
|
|
mean value: 0.9739784946236559
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.96774194 1. 0.98387097 0.98387097 0.96774194
|
|
1. 0.96774194 1. 0.96666667]
|
|
|
|
mean value: 0.983763440860215
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.9375 1. 0.96875 0.96774194 0.93548387
|
|
1. 0.93548387 1. 0.93333333]
|
|
|
|
mean value: 0.9678293010752688
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04852033 0.05345178 0.04365349 0.07337523 0.0658989 0.07849956
|
|
0.07661462 0.05499148 0.07248783 0.0457654 ]
|
|
|
|
mean value: 0.06132586002349853
|
|
|
|
key: score_time
|
|
value: [0.01924372 0.01231289 0.01934409 0.01247478 0.02116656 0.01962399
|
|
0.01251864 0.01944685 0.01242089 0.01249909]
|
|
|
|
mean value: 0.016105151176452635
|
|
|
|
key: test_mcc
|
|
value: [0.83914639 0.87096774 1. 0.77784447 0.96824584 0.87278605
|
|
0.90369611 0.87278605 0.9344086 0.73763441]
|
|
|
|
mean value: 0.8777515659651098
|
|
|
|
key: train_mcc
|
|
value: [0.95705746 0.96412858 0.94283651 0.96058703 0.94634322 0.96048758
|
|
0.94266562 0.95353974 0.96419362 0.9679883 ]
|
|
|
|
mean value: 0.9559827674087997
|
|
|
|
key: test_accuracy
|
|
value: [0.91935484 0.93548387 1. 0.88709677 0.98387097 0.93548387
|
|
0.9516129 0.93548387 0.96721311 0.86885246]
|
|
|
|
mean value: 0.9384452670544685
|
|
|
|
key: train_accuracy
|
|
value: [0.97841727 0.98201439 0.97122302 0.98021583 0.97302158 0.98021583
|
|
0.97122302 0.97661871 0.98204668 0.98384201]
|
|
|
|
mean value: 0.977883832969531
|
|
|
|
key: test_fscore
|
|
value: [0.91803279 0.93548387 1. 0.89230769 0.98360656 0.93333333
|
|
0.95081967 0.93333333 0.96774194 0.86666667]
|
|
|
|
mean value: 0.9381325848486082
|
|
|
|
key: train_fscore
|
|
value: [0.97864769 0.98214286 0.97163121 0.98039216 0.97335702 0.980322
|
|
0.97153025 0.97690941 0.98214286 0.9840708 ]
|
|
|
|
mean value: 0.9781146242643416
|
|
|
|
key: test_precision
|
|
value: [0.93333333 0.93548387 1. 0.85294118 1. 0.96551724
|
|
0.96666667 0.96551724 0.96774194 0.86666667]
|
|
|
|
mean value: 0.9453868132347488
|
|
|
|
key: train_precision
|
|
value: [0.96830986 0.9751773 0.95804196 0.97173145 0.96140351 0.97508897
|
|
0.96126761 0.96491228 0.9751773 0.97202797]
|
|
|
|
mean value: 0.9683138210996206
|
|
|
|
key: test_recall
|
|
value: [0.90322581 0.93548387 1. 0.93548387 0.96774194 0.90322581
|
|
0.93548387 0.90322581 0.96774194 0.86666667]
|
|
|
|
mean value: 0.9318279569892473
|
|
|
|
key: train_recall
|
|
value: [0.98920863 0.98920863 0.98561151 0.98920863 0.98561151 0.98561151
|
|
0.98201439 0.98920863 0.98920863 0.99641577]
|
|
|
|
mean value: 0.9881307856940253
|
|
|
|
key: test_roc_auc
|
|
value: [0.91935484 0.93548387 1. 0.88709677 0.98387097 0.93548387
|
|
0.9516129 0.93548387 0.9672043 0.8688172 ]
|
|
|
|
mean value: 0.9384408602150538
|
|
|
|
key: train_roc_auc
|
|
value: [0.97841727 0.98201439 0.97122302 0.98021583 0.97302158 0.98021583
|
|
0.97122302 0.97661871 0.98205951 0.9838194 ]
|
|
|
|
mean value: 0.9778828550063176
|
|
|
|
key: test_jcc
|
|
value: [0.84848485 0.87878788 1. 0.80555556 0.96774194 0.875
|
|
0.90625 0.875 0.9375 0.76470588]
|
|
|
|
mean value: 0.8859026100665095
|
|
|
|
key: train_jcc
|
|
value: [0.95818815 0.96491228 0.94482759 0.96153846 0.94809689 0.96140351
|
|
0.94463668 0.95486111 0.96491228 0.96864111]
|
|
|
|
mean value: 0.9572018061338432
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01502514 0.01342988 0.01100564 0.0116806 0.01108813 0.01071095
|
|
0.01139832 0.01138282 0.01146936 0.01143527]
|
|
|
|
mean value: 0.011862611770629883
|
|
|
|
key: score_time
|
|
value: [0.0120914 0.01019168 0.00957203 0.00933599 0.00964856 0.00924063
|
|
0.00965118 0.00963283 0.0095036 0.00960135]
|
|
|
|
mean value: 0.009846925735473633
|
|
|
|
key: test_mcc
|
|
value: [0.67741935 0.54953196 0.67741935 0.59603956 0.74193548 0.77784447
|
|
0.51639778 0.58834841 0.54086022 0.8403496 ]
|
|
|
|
mean value: 0.6506146178904506
|
|
|
|
key: train_mcc
|
|
value: [0.63097179 0.67116969 0.64951905 0.64509217 0.6522999 0.64916414
|
|
0.66359001 0.65576325 0.64576598 0.62037396]
|
|
|
|
mean value: 0.6483709942789844
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.77419355 0.83870968 0.79032258 0.87096774 0.88709677
|
|
0.75806452 0.79032258 0.7704918 0.91803279]
|
|
|
|
mean value: 0.8236911686938128
|
|
|
|
key: train_accuracy
|
|
value: [0.8147482 0.83453237 0.82374101 0.82194245 0.82553957 0.82374101
|
|
0.83093525 0.82733813 0.82226212 0.80969479]
|
|
|
|
mean value: 0.8234474897640236
|
|
|
|
key: test_fscore
|
|
value: [0.83870968 0.78125 0.83870968 0.8115942 0.87096774 0.89230769
|
|
0.76190476 0.80597015 0.77419355 0.92063492]
|
|
|
|
mean value: 0.8296242372160947
|
|
|
|
key: train_fscore
|
|
value: [0.82086957 0.84083045 0.83044983 0.82722513 0.83071553 0.82986111
|
|
0.83680556 0.83216783 0.82722513 0.81533101]
|
|
|
|
mean value: 0.8291481145387779
|
|
|
|
key: test_precision
|
|
value: [0.83870968 0.75757576 0.83870968 0.73684211 0.87096774 0.85294118
|
|
0.75 0.75 0.77419355 0.87878788]
|
|
|
|
mean value: 0.8048727563258673
|
|
|
|
key: train_precision
|
|
value: [0.79461279 0.81 0.8 0.80338983 0.80677966 0.80201342
|
|
0.80872483 0.80952381 0.80338983 0.79322034]
|
|
|
|
mean value: 0.803165452018711
|
|
|
|
key: test_recall
|
|
value: [0.83870968 0.80645161 0.83870968 0.90322581 0.87096774 0.93548387
|
|
0.77419355 0.87096774 0.77419355 0.96666667]
|
|
|
|
mean value: 0.8579569892473118
|
|
|
|
key: train_recall
|
|
value: [0.84892086 0.87410072 0.86330935 0.85251799 0.85611511 0.85971223
|
|
0.86690647 0.85611511 0.85251799 0.83870968]
|
|
|
|
mean value: 0.8568925504757484
|
|
|
|
key: test_roc_auc
|
|
value: [0.83870968 0.77419355 0.83870968 0.79032258 0.87096774 0.88709677
|
|
0.75806452 0.79032258 0.77043011 0.9188172 ]
|
|
|
|
mean value: 0.8237634408602151
|
|
|
|
key: train_roc_auc
|
|
value: [0.8147482 0.83453237 0.82374101 0.82194245 0.82553957 0.82374101
|
|
0.83093525 0.82733813 0.82231634 0.80964261]
|
|
|
|
mean value: 0.8234476934581367
|
|
|
|
key: test_jcc
|
|
value: [0.72222222 0.64102564 0.72222222 0.68292683 0.77142857 0.80555556
|
|
0.61538462 0.675 0.63157895 0.85294118]
|
|
|
|
mean value: 0.712028578094613
|
|
|
|
key: train_jcc
|
|
value: [0.69616519 0.72537313 0.71005917 0.70535714 0.71044776 0.70919881
|
|
0.71940299 0.71257485 0.70535714 0.68823529]
|
|
|
|
mean value: 0.7082171487122775
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0255146 0.02991891 0.02861214 0.0264709 0.0301187 0.02686048
|
|
0.03000832 0.03060079 0.03761053 0.03682876]
|
|
|
|
mean value: 0.030254411697387695
|
|
|
|
key: score_time
|
|
value: [0.0106895 0.01152873 0.01216507 0.01219368 0.01213622 0.01212597
|
|
0.01233673 0.0122211 0.01223564 0.01243973]
|
|
|
|
mean value: 0.01200723648071289
|
|
|
|
key: test_mcc
|
|
value: [0.90748521 0.87096774 0.93743687 0.90369611 0.93743687 0.87278605
|
|
0.90748521 0.82199494 0.87613871 0.90204573]
|
|
|
|
mean value: 0.8937473447150879
|
|
|
|
key: train_mcc
|
|
value: [0.94653932 0.94986154 0.95329292 0.93644001 0.92580909 0.88612956
|
|
0.93987712 0.8782527 0.95034654 0.97487172]
|
|
|
|
mean value: 0.9341420499730175
|
|
|
|
key: test_accuracy
|
|
value: [0.9516129 0.93548387 0.96774194 0.9516129 0.96774194 0.93548387
|
|
0.9516129 0.90322581 0.93442623 0.95081967]
|
|
|
|
mean value: 0.9449762030671602
|
|
|
|
key: train_accuracy
|
|
value: [0.97302158 0.97482014 0.97661871 0.9676259 0.96223022 0.94064748
|
|
0.96942446 0.93705036 0.97486535 0.98743268]
|
|
|
|
mean value: 0.9663736874055513
|
|
|
|
key: test_fscore
|
|
value: [0.94915254 0.93548387 0.96666667 0.95238095 0.96875 0.9375
|
|
0.95384615 0.89285714 0.93939394 0.94915254]
|
|
|
|
mean value: 0.944518381085836
|
|
|
|
key: train_fscore
|
|
value: [0.9725777 0.97454545 0.97649186 0.96678967 0.96322242 0.94358974
|
|
0.97012302 0.93383743 0.97526502 0.98743268]
|
|
|
|
mean value: 0.9663874986610166
|
|
|
|
key: test_precision
|
|
value: [1. 0.93548387 1. 0.9375 0.93939394 0.90909091
|
|
0.91176471 1. 0.88571429 0.96551724]
|
|
|
|
mean value: 0.948446495242854
|
|
|
|
key: train_precision
|
|
value: [0.98884758 0.98529412 0.98181818 0.99242424 0.93856655 0.8990228
|
|
0.94845361 0.98406375 0.95833333 0.98920863]
|
|
|
|
mean value: 0.9666032799430763
|
|
|
|
key: test_recall
|
|
value: [0.90322581 0.93548387 0.93548387 0.96774194 1. 0.96774194
|
|
1. 0.80645161 1. 0.93333333]
|
|
|
|
mean value: 0.9449462365591398
|
|
|
|
key: train_recall
|
|
value: [0.95683453 0.96402878 0.97122302 0.94244604 0.98920863 0.99280576
|
|
0.99280576 0.88848921 0.99280576 0.98566308]
|
|
|
|
mean value: 0.9676310564451664
|
|
|
|
key: test_roc_auc
|
|
value: [0.9516129 0.93548387 0.96774194 0.9516129 0.96774194 0.93548387
|
|
0.9516129 0.90322581 0.93333333 0.95053763]
|
|
|
|
mean value: 0.9448387096774193
|
|
|
|
key: train_roc_auc
|
|
value: [0.97302158 0.97482014 0.97661871 0.9676259 0.96223022 0.94064748
|
|
0.96942446 0.93705036 0.9748975 0.98743586]
|
|
|
|
mean value: 0.9663772208040019
|
|
|
|
key: test_jcc
|
|
value: [0.90322581 0.87878788 0.93548387 0.90909091 0.93939394 0.88235294
|
|
0.91176471 0.80645161 0.88571429 0.90322581]
|
|
|
|
mean value: 0.895549175682003
|
|
|
|
key: train_jcc
|
|
value: [0.94661922 0.95035461 0.9540636 0.93571429 0.92905405 0.89320388
|
|
0.94197952 0.87588652 0.95172414 0.9751773 ]
|
|
|
|
mean value: 0.9353777144417266
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01973319 0.01927781 0.02288127 0.03501034 0.02319169 0.01984477
|
|
0.02170777 0.02149868 0.0256052 0.02394271]
|
|
|
|
mean value: 0.023269343376159667
|
|
|
|
key: score_time
|
|
value: [0.01226687 0.01212335 0.01218319 0.01252127 0.01216912 0.01212978
|
|
0.01235199 0.01232243 0.01220965 0.01219463]
|
|
|
|
mean value: 0.012247228622436523
|
|
|
|
key: test_mcc
|
|
value: [0.90748521 0.7130241 0.90748521 0.90748521 0.93743687 0.67419986
|
|
0.78446454 0.81325006 0.81870035 0.80983045]
|
|
|
|
mean value: 0.827336186938833
|
|
|
|
key: train_mcc
|
|
value: [0.96425338 0.88767604 0.89932729 0.96405373 0.83603758 0.77906124
|
|
0.91078521 0.78665297 0.8371636 0.90900083]
|
|
|
|
mean value: 0.8774011859965327
|
|
|
|
key: test_accuracy
|
|
value: [0.9516129 0.85483871 0.9516129 0.9516129 0.96774194 0.82258065
|
|
0.88709677 0.90322581 0.90163934 0.90163934]
|
|
|
|
mean value: 0.9093601269169752
|
|
|
|
key: train_accuracy
|
|
value: [0.98201439 0.94244604 0.94784173 0.98201439 0.91366906 0.87769784
|
|
0.95503597 0.88309353 0.91202873 0.95332136]
|
|
|
|
mean value: 0.9349163039406895
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.84745763 0.95384615 0.95384615 0.96666667 0.84507042
|
|
0.87719298 0.90909091 0.91176471 0.89285714]
|
|
|
|
mean value: 0.9111638918145528
|
|
|
|
key: train_fscore
|
|
value: [0.98220641 0.94007491 0.95008606 0.98194946 0.90697674 0.89102564
|
|
0.95412844 0.89499192 0.91900826 0.95167286]
|
|
|
|
mean value: 0.9372120704015114
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.89285714 0.91176471 0.91176471 1. 0.75
|
|
0.96153846 0.85714286 0.83783784 0.96153846]
|
|
|
|
mean value: 0.899620887856182
|
|
|
|
key: train_precision
|
|
value: [0.97183099 0.98046875 0.91089109 0.98550725 0.98319328 0.80346821
|
|
0.97378277 0.81231672 0.85015291 0.98841699]
|
|
|
|
mean value: 0.9260028937498493
|
|
|
|
key: test_recall
|
|
value: [1. 0.80645161 1. 1. 0.93548387 0.96774194
|
|
0.80645161 0.96774194 1. 0.83333333]
|
|
|
|
mean value: 0.9317204301075269
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.9028777 0.99280576 0.97841727 0.84172662 1.
|
|
0.9352518 0.99640288 1. 0.91756272]
|
|
|
|
mean value: 0.955785049379851
|
|
|
|
key: test_roc_auc
|
|
value: [0.9516129 0.85483871 0.9516129 0.9516129 0.96774194 0.82258065
|
|
0.88709677 0.90322581 0.9 0.90053763]
|
|
|
|
mean value: 0.9090860215053763
|
|
|
|
key: train_roc_auc
|
|
value: [0.98201439 0.94244604 0.94784173 0.98201439 0.91366906 0.87769784
|
|
0.95503597 0.88309353 0.91218638 0.95338568]
|
|
|
|
mean value: 0.9349385008122534
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.73529412 0.91176471 0.91176471 0.93548387 0.73170732
|
|
0.78125 0.83333333 0.83783784 0.80645161]
|
|
|
|
mean value: 0.8396652207409427
|
|
|
|
key: train_jcc
|
|
value: [0.96503497 0.8869258 0.90491803 0.96453901 0.82978723 0.80346821
|
|
0.9122807 0.80994152 0.85015291 0.90780142]
|
|
|
|
mean value: 0.8834849787962806
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21634364 0.19705439 0.20209169 0.20247054 0.18944693 0.18858314
|
|
0.19010186 0.1894753 0.19017911 0.19933033]
|
|
|
|
mean value: 0.19650769233703613
|
|
|
|
key: score_time
|
|
value: [0.01674056 0.01665401 0.01689458 0.01660991 0.01544285 0.01575398
|
|
0.01562691 0.01547384 0.01630163 0.01683712]
|
|
|
|
mean value: 0.016233539581298827
|
|
|
|
key: test_mcc
|
|
value: [1. 0.93743687 0.96824584 0.90369611 1. 0.96824584
|
|
0.87278605 0.93743687 0.96770777 0.96770777]
|
|
|
|
mean value: 0.9523263113907507
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.96774194 0.98387097 0.9516129 1. 0.98387097
|
|
0.93548387 0.96774194 0.98360656 0.98360656]
|
|
|
|
mean value: 0.975753569539926
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.96875 0.98360656 0.95238095 1. 0.98360656
|
|
0.93333333 0.96666667 0.98412698 0.98305085]
|
|
|
|
mean value: 0.9755521898719661
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.93939394 1. 0.9375 1. 1.
|
|
0.96551724 1. 0.96875 1. ]
|
|
|
|
mean value: 0.981116118077325
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96774194 0.96774194 1. 0.96774194
|
|
0.90322581 0.93548387 1. 0.96666667]
|
|
|
|
mean value: 0.9708602150537634
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.96774194 0.98387097 0.9516129 1. 0.98387097
|
|
0.93548387 0.96774194 0.98333333 0.98333333]
|
|
|
|
mean value: 0.9756989247311828
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.93939394 0.96774194 0.90909091 1. 0.96774194
|
|
0.875 0.93548387 0.96875 0.96666667]
|
|
|
|
mean value: 0.9529869257086999
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07209158 0.07015896 0.07557607 0.07624006 0.07895207 0.08325553
|
|
0.08903265 0.07233906 0.08569217 0.08808112]
|
|
|
|
mean value: 0.07914192676544189
|
|
|
|
key: score_time
|
|
value: [0.02525473 0.02876973 0.03141665 0.02610064 0.03126097 0.03248501
|
|
0.04130626 0.03619337 0.0269568 0.04313469]
|
|
|
|
mean value: 0.032287883758544925
|
|
|
|
key: test_mcc
|
|
value: [1. 0.90369611 1. 0.93548387 0.96824584 0.90748521
|
|
0.96824584 0.96824584 1. 0.87613871]
|
|
|
|
mean value: 0.9527541422538592
|
|
|
|
key: train_mcc
|
|
value: [0.99283145 0.99283145 0.99640932 0.99640932 0.98207157 0.98921503
|
|
0.99640932 0.99640932 0.99284416 0.99284434]
|
|
|
|
mean value: 0.9928275300190849
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9516129 1. 0.96774194 0.98387097 0.9516129
|
|
0.98387097 0.98387097 1. 0.93442623]
|
|
|
|
mean value: 0.9757006874669487
|
|
|
|
key: train_accuracy
|
|
value: [0.99640288 0.99640288 0.99820144 0.99820144 0.99100719 0.99460432
|
|
0.99820144 0.99820144 0.99640934 0.99640934]
|
|
|
|
mean value: 0.9964041693036954
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95081967 1. 0.96774194 0.98360656 0.94915254
|
|
0.98412698 0.98360656 1. 0.92857143]
|
|
|
|
mean value: 0.9747625677440411
|
|
|
|
key: train_fscore
|
|
value: [0.99638989 0.99638989 0.9981982 0.9981982 0.99095841 0.99459459
|
|
0.99820467 0.9981982 0.99638989 0.99640288]
|
|
|
|
mean value: 0.9963924818520766
|
|
|
|
key: test_precision
|
|
value: [1. 0.96666667 1. 0.96774194 1. 1.
|
|
0.96875 1. 1. 1. ]
|
|
|
|
mean value: 0.9903158602150538
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.99636364 0.99638989
|
|
0.99641577 1. 1. 1. ]
|
|
|
|
mean value: 0.9989169298669707
|
|
|
|
key: test_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[1. 0.93548387 1. 0.96774194 0.96774194 0.90322581
|
|
1. 0.96774194 1. 0.86666667]
|
|
|
|
mean value: 0.9608602150537634
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.99280576 0.99640288 0.99640288 0.98561151 0.99280576
|
|
1. 0.99640288 0.99280576 0.99283154]
|
|
|
|
mean value: 0.9938874706686264
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.9516129 1. 0.96774194 0.98387097 0.9516129
|
|
0.98387097 0.98387097 1. 0.93333333]
|
|
|
|
mean value: 0.9755913978494624
|
|
|
|
key: train_roc_auc
|
|
value: [0.99640288 0.99640288 0.99820144 0.99820144 0.99100719 0.99460432
|
|
0.99820144 0.99820144 0.99640288 0.99641577]
|
|
|
|
mean value: 0.9964041669889895
|
|
|
|
key: test_jcc
|
|
value: [1. 0.90625 1. 0.9375 0.96774194 0.90322581
|
|
0.96875 0.96774194 1. 0.86666667]
|
|
|
|
mean value: 0.9517876344086021
|
|
|
|
key: train_jcc
|
|
value: [0.99280576 0.99280576 0.99640288 0.99640288 0.98207885 0.98924731
|
|
0.99641577 0.99640288 0.99280576 0.99283154]
|
|
|
|
mean value: 0.9928199375983084
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.21
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22249794 0.21834207 0.20632315 0.21552396 0.18451118 0.13607502
|
|
0.16399002 0.21262717 0.26152039 0.24413514]
|
|
|
|
mean value: 0.20655460357666017
|
|
|
|
key: score_time
|
|
value: [0.02721357 0.02944684 0.02703238 0.02707863 0.01634979 0.01623416
|
|
0.01634526 0.03195477 0.03438377 0.0317266 ]
|
|
|
|
mean value: 0.02577657699584961
|
|
|
|
key: test_mcc
|
|
value: [0.80813523 0.64549722 0.87096774 0.87096774 0.78446454 0.78446454
|
|
0.71004695 0.74348441 0.67858574 0.84710837]
|
|
|
|
mean value: 0.7743722483793141
|
|
|
|
key: train_mcc
|
|
value: [0.96402878 0.95683453 0.96763216 0.97124816 0.96405373 0.97124816
|
|
0.97124816 0.97841727 0.96769036 0.97127459]
|
|
|
|
mean value: 0.9683675887070088
|
|
|
|
key: test_accuracy
|
|
value: [0.90322581 0.82258065 0.93548387 0.93548387 0.88709677 0.88709677
|
|
0.85483871 0.87096774 0.83606557 0.91803279]
|
|
|
|
mean value: 0.8850872554204124
|
|
|
|
key: train_accuracy
|
|
value: [0.98201439 0.97841727 0.98381295 0.98561151 0.98201439 0.98561151
|
|
0.98561151 0.98920863 0.98384201 0.98563734]
|
|
|
|
mean value: 0.9841781511953812
|
|
|
|
key: test_fscore
|
|
value: [0.9 0.82539683 0.93548387 0.93548387 0.87719298 0.87719298
|
|
0.85714286 0.86666667 0.82758621 0.90909091]
|
|
|
|
mean value: 0.8811237172041574
|
|
|
|
key: train_fscore
|
|
value: [0.98201439 0.97841727 0.98384201 0.98555957 0.98194946 0.98555957
|
|
0.98555957 0.98920863 0.98384201 0.98566308]
|
|
|
|
mean value: 0.9841615550595811
|
|
|
|
key: test_precision
|
|
value: [0.93103448 0.8125 0.93548387 0.93548387 0.96153846 0.96153846
|
|
0.84375 0.89655172 0.88888889 1. ]
|
|
|
|
mean value: 0.9166769760797847
|
|
|
|
key: train_precision
|
|
value: [0.98201439 0.97841727 0.98207885 0.98913043 0.98550725 0.98913043
|
|
0.98913043 0.98920863 0.98207885 0.98566308]
|
|
|
|
mean value: 0.9852359627024888
|
|
|
|
key: test_recall
|
|
value: [0.87096774 0.83870968 0.93548387 0.93548387 0.80645161 0.80645161
|
|
0.87096774 0.83870968 0.77419355 0.83333333]
|
|
|
|
mean value: 0.8510752688172043
|
|
|
|
key: train_recall
|
|
value: [0.98201439 0.97841727 0.98561151 0.98201439 0.97841727 0.98201439
|
|
0.98201439 0.98920863 0.98561151 0.98566308]
|
|
|
|
mean value: 0.983098682344447
|
|
|
|
key: test_roc_auc
|
|
value: [0.90322581 0.82258065 0.93548387 0.93548387 0.88709677 0.88709677
|
|
0.85483871 0.87096774 0.83709677 0.91666667]
|
|
|
|
mean value: 0.8850537634408603
|
|
|
|
key: train_roc_auc
|
|
value: [0.98201439 0.97841727 0.98381295 0.98561151 0.98201439 0.98561151
|
|
0.98561151 0.98920863 0.98384518 0.9856373 ]
|
|
|
|
mean value: 0.9841784636806684
|
|
|
|
key: test_jcc
|
|
value: [0.81818182 0.7027027 0.87878788 0.87878788 0.78125 0.78125
|
|
0.75 0.76470588 0.70588235 0.83333333]
|
|
|
|
mean value: 0.7894881847087729
|
|
|
|
key: train_jcc
|
|
value: [0.96466431 0.95774648 0.96819788 0.97153025 0.96453901 0.97153025
|
|
0.97153025 0.97864769 0.96819788 0.97173145]
|
|
|
|
mean value: 0.9688315439563768
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.76334548 0.75004768 0.75521564 0.75695348 0.74053025 0.74805856
|
|
0.72955751 0.7617662 0.77704668 0.75503945]
|
|
|
|
mean value: 0.7537560939788819
|
|
|
|
key: score_time
|
|
value: [0.0095737 0.00933933 0.00931716 0.00973773 0.00978923 0.00931406
|
|
0.00945735 0.01039243 0.01002336 0.00945759]
|
|
|
|
mean value: 0.009640192985534668
|
|
|
|
key: test_mcc
|
|
value: [1. 0.90369611 1. 0.90369611 1. 0.93743687
|
|
1. 0.96824584 0.96770777 0.90586325]
|
|
|
|
mean value: 0.9586645958228902
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9516129 1. 0.9516129 1. 0.96774194
|
|
1. 0.98387097 0.98360656 0.95081967]
|
|
|
|
mean value: 0.9789264939185616
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95238095 1. 0.95238095 1. 0.96666667
|
|
1. 0.98360656 0.98412698 0.94736842]
|
|
|
|
mean value: 0.9786530533985236
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.9375 1. 0.9375 1. 1. 1. 1. 0.96875
|
|
1. ]
|
|
|
|
mean value: 0.984375
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 0.96774194 1. 0.93548387
|
|
1. 0.96774194 1. 0.9 ]
|
|
|
|
mean value: 0.9738709677419355
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.9516129 1. 0.9516129 1. 0.96774194
|
|
1. 0.98387097 0.98333333 0.95 ]
|
|
|
|
mean value: 0.9788172043010753
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.90909091 1. 0.90909091 1. 0.93548387
|
|
1. 0.96774194 0.96875 0.9 ]
|
|
|
|
mean value: 0.9590157624633431
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03594589 0.03572512 0.03616953 0.03602529 0.03524661 0.03499389
|
|
0.03544331 0.03508949 0.03510737 0.0351696 ]
|
|
|
|
mean value: 0.03549160957336426
|
|
|
|
key: score_time
|
|
value: [0.01282716 0.0128293 0.01287484 0.01278615 0.01273584 0.01271701
|
|
0.01275468 0.01284575 0.01267576 0.01275253]
|
|
|
|
mean value: 0.012779903411865235
|
|
|
|
key: test_mcc
|
|
value: [0.55205245 0.49319696 0.61807005 0.57935845 0.74161985 0.61807005
|
|
0.54953196 0.54953196 0.74352218 0.47526882]
|
|
|
|
mean value: 0.5920222715118862
|
|
|
|
key: train_mcc
|
|
value: [0.7987718 0.83577199 0.79337932 0.7147514 0.73496 0.84543222
|
|
0.86422693 0.83249324 0.86463537 0.80414275]
|
|
|
|
mean value: 0.8088565031567613
|
|
|
|
key: test_accuracy
|
|
value: [0.75806452 0.74193548 0.80645161 0.77419355 0.85483871 0.80645161
|
|
0.77419355 0.77419355 0.86885246 0.73770492]
|
|
|
|
mean value: 0.7896879957694342
|
|
|
|
key: train_accuracy
|
|
value: [0.89028777 0.9118705 0.89388489 0.8381295 0.85071942 0.92086331
|
|
0.92805755 0.91546763 0.93177738 0.89766607]
|
|
|
|
mean value: 0.897872402257727
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.71428571 0.81818182 0.73076923 0.83018868 0.79310345
|
|
0.76666667 0.76666667 0.87878788 0.73333333]
|
|
|
|
mean value: 0.7737865789153631
|
|
|
|
key: train_fscore
|
|
value: [0.87726358 0.90373281 0.89983022 0.80686695 0.82452431 0.91698113
|
|
0.92277992 0.91280148 0.9298893 0.88974855]
|
|
|
|
mean value: 0.8884418264619824
|
|
|
|
key: test_precision
|
|
value: [0.9 0.8 0.77142857 0.9047619 1. 0.85185185
|
|
0.79310345 0.79310345 0.82857143 0.73333333]
|
|
|
|
mean value: 0.8376153986498814
|
|
|
|
key: train_precision
|
|
value: [0.99543379 0.995671 0.85209003 1. 1. 0.96428571
|
|
0.99583333 0.94252874 0.95454545 0.96638655]
|
|
|
|
mean value: 0.9666774610198209
|
|
|
|
key: test_recall
|
|
value: [0.58064516 0.64516129 0.87096774 0.61290323 0.70967742 0.74193548
|
|
0.74193548 0.74193548 0.93548387 0.73333333]
|
|
|
|
mean value: 0.7313978494623656
|
|
|
|
key: train_recall
|
|
value: [0.78417266 0.82733813 0.95323741 0.67625899 0.70143885 0.87410072
|
|
0.85971223 0.88489209 0.90647482 0.82437276]
|
|
|
|
mean value: 0.8291998659137206
|
|
|
|
key: test_roc_auc
|
|
value: [0.75806452 0.74193548 0.80645161 0.77419355 0.85483871 0.80645161
|
|
0.77419355 0.77419355 0.86774194 0.73763441]
|
|
|
|
mean value: 0.7895698924731183
|
|
|
|
key: train_roc_auc
|
|
value: [0.89028777 0.9118705 0.89388489 0.8381295 0.85071942 0.92086331
|
|
0.92805755 0.91546763 0.93173203 0.89779789]
|
|
|
|
mean value: 0.8978810499987108
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.55555556 0.69230769 0.57575758 0.70967742 0.65714286
|
|
0.62162162 0.62162162 0.78378378 0.57894737]
|
|
|
|
mean value: 0.6341870041021145
|
|
|
|
key: train_jcc
|
|
value: [0.78136201 0.82437276 0.81790123 0.67625899 0.70143885 0.8466899
|
|
0.85663082 0.83959044 0.86896552 0.80139373]
|
|
|
|
mean value: 0.8014604252313136
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01635957 0.01777172 0.03165007 0.04656363 0.04724741 0.0389545
|
|
0.03878331 0.03752255 0.03754044 0.04346228]
|
|
|
|
mean value: 0.035585546493530275
|
|
|
|
key: score_time
|
|
value: [0.01368904 0.0122335 0.01877952 0.02584672 0.03397083 0.0331285
|
|
0.0305109 0.02508545 0.02114463 0.02042747]
|
|
|
|
mean value: 0.02348165512084961
|
|
|
|
key: test_mcc
|
|
value: [0.90369611 0.87278605 0.90369611 0.84983659 0.93743687 0.93548387
|
|
0.90369611 0.87278605 0.9344086 0.83655914]
|
|
|
|
mean value: 0.8950385503763444
|
|
|
|
key: train_mcc
|
|
value: [0.94634322 0.9393413 0.93958474 0.95693359 0.93238486 0.95339163
|
|
0.93585746 0.95025527 0.94663736 0.94994909]
|
|
|
|
mean value: 0.945067852011007
|
|
|
|
key: test_accuracy
|
|
value: [0.9516129 0.93548387 0.9516129 0.91935484 0.96774194 0.96774194
|
|
0.9516129 0.93548387 0.96721311 0.91803279]
|
|
|
|
mean value: 0.9465891062929667
|
|
|
|
key: train_accuracy
|
|
value: [0.97302158 0.96942446 0.96942446 0.97841727 0.96582734 0.97661871
|
|
0.9676259 0.97482014 0.97307002 0.97486535]
|
|
|
|
mean value: 0.9723115224158196
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.9375 0.95238095 0.92537313 0.96875 0.96774194
|
|
0.95081967 0.93333333 0.96774194 0.91803279]
|
|
|
|
mean value: 0.9474054702407732
|
|
|
|
key: train_fscore /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:155: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:158: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
value: [0.97335702 0.9699115 0.97001764 0.97857143 0.9664903 0.97682709
|
|
0.96819788 0.97526502 0.97345133 0.9751773 ]
|
|
|
|
mean value: 0.9727266509888757
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.90909091 0.9375 0.86111111 0.93939394 0.96774194
|
|
0.96666667 0.96551724 0.96774194 0.90322581]
|
|
|
|
mean value: 0.9355489545061292
|
|
|
|
key: train_precision
|
|
value: [0.96140351 0.95470383 0.95155709 0.97163121 0.94809689 0.96819788
|
|
0.95138889 0.95833333 0.95818815 0.96491228]
|
|
|
|
mean value: 0.9588413062529795
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.96774194 1. 1. 0.96774194
|
|
0.93548387 0.90322581 0.96774194 0.93333333]
|
|
|
|
mean value: 0.9610752688172043
|
|
|
|
key: train_recall
|
|
value: [0.98561151 0.98561151 0.98920863 0.98561151 0.98561151 0.98561151
|
|
0.98561151 0.99280576 0.98920863 0.98566308]
|
|
|
|
mean value: 0.9870555168768211
|
|
|
|
key: test_roc_auc
|
|
value: [0.9516129 0.93548387 0.9516129 0.91935484 0.96774194 0.96774194
|
|
0.9516129 0.93548387 0.9672043 0.91827957]
|
|
|
|
mean value: 0.9466129032258065
|
|
|
|
key: train_roc_auc
|
|
value: [0.97302158 0.96942446 0.96942446 0.97841727 0.96582734 0.97661871
|
|
0.9676259 0.97482014 0.97309894 0.97484593]
|
|
|
|
mean value: 0.9723124726025631
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.88235294 0.90909091 0.86111111 0.93939394 0.9375
|
|
0.90625 0.875 0.9375 0.84848485]
|
|
|
|
mean value: 0.9005774658348188
|
|
|
|
key: train_jcc
|
|
value: [0.94809689 0.94158076 0.94178082 0.95804196 0.93515358 0.95470383
|
|
0.93835616 0.95172414 0.94827586 0.95155709]
|
|
|
|
mean value: 0.9469271095966189
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.21475935 0.18135929 0.22929859 0.29797983 0.30646324 0.30647278
|
|
0.35200739 0.39313698 0.30069876 0.32392144]
|
|
|
|
mean value: 0.2906097650527954
|
|
|
|
key: score_time
|
|
value: [0.01228642 0.01898122 0.01878119 0.01901627 0.0246706 0.01887894
|
|
0.01930165 0.01910377 0.02126479 0.01894569]
|
|
|
|
mean value: 0.019123053550720213
|
|
|
|
key: test_mcc
|
|
value: [0.83914639 0.87278605 0.96824584 0.84983659 0.96824584 0.90369611
|
|
0.90369611 0.84266484 0.9344086 0.83655914]
|
|
|
|
mean value: 0.8919285509341773
|
|
|
|
key: train_mcc
|
|
value: [0.95003374 0.96412858 0.94283651 0.95693359 0.94283651 0.96048758
|
|
0.93585746 0.95723096 0.96419362 0.94994909]
|
|
|
|
mean value: 0.9524487657494496
|
|
|
|
key: test_accuracy
|
|
value: [0.91935484 0.93548387 0.98387097 0.91935484 0.98387097 0.9516129
|
|
0.9516129 0.91935484 0.96721311 0.91803279]
|
|
|
|
mean value: 0.9449762030671602
|
|
|
|
key: train_accuracy
|
|
value: [0.97482014 0.98201439 0.97122302 0.97841727 0.97122302 0.98021583
|
|
0.9676259 0.97841727 0.98204668 0.97486535]
|
|
|
|
mean value: 0.9760868863257688
|
|
|
|
key: test_fscore
|
|
value: [0.92063492 0.9375 0.98412698 0.92537313 0.98360656 0.95081967
|
|
0.95081967 0.91525424 0.96774194 0.91803279]
|
|
|
|
mean value: 0.945390990038686
|
|
|
|
key: train_fscore
|
|
value: [0.9751773 0.98214286 0.97163121 0.97857143 0.97163121 0.980322
|
|
0.96819788 0.9787234 0.98214286 0.9751773 ]
|
|
|
|
mean value: 0.9763717451825532
|
|
|
|
key: test_precision
|
|
value: [0.90625 0.90909091 0.96875 0.86111111 1. 0.96666667
|
|
0.96666667 0.96428571 0.96774194 0.90322581]
|
|
|
|
mean value: 0.9413788809756551
|
|
|
|
key: train_precision
|
|
value: [0.96153846 0.9751773 0.95804196 0.97163121 0.95804196 0.97508897
|
|
0.95138889 0.96503497 0.9751773 0.96491228]
|
|
|
|
mean value: 0.9656033295822353
|
|
|
|
key: test_recall
|
|
value: [0.93548387 0.96774194 1. 1. 0.96774194 0.93548387
|
|
0.93548387 0.87096774 0.96774194 0.93333333]
|
|
|
|
mean value: 0.9513978494623656
|
|
|
|
key: train_recall
|
|
value: [0.98920863 0.98920863 0.98561151 0.98561151 0.98561151 0.98561151
|
|
0.98561151 0.99280576 0.98920863 0.98566308]
|
|
|
|
mean value: 0.9874152291070369
|
|
|
|
key: test_roc_auc
|
|
value: [0.91935484 0.93548387 0.98387097 0.91935484 0.98387097 0.9516129
|
|
0.9516129 0.91935484 0.9672043 0.91827957]
|
|
|
|
mean value: 0.9450000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.97482014 0.98201439 0.97122302 0.97841727 0.97122302 0.98021583
|
|
0.9676259 0.97841727 0.98205951 0.97484593]
|
|
|
|
mean value: 0.97608622779196
|
|
|
|
key: test_jcc
|
|
value: [0.85294118 0.88235294 0.96875 0.86111111 0.96774194 0.90625
|
|
0.90625 0.84375 0.9375 0.84848485]
|
|
|
|
mean value: 0.897513201272689
|
|
|
|
key: train_jcc
|
|
value: [0.95155709 0.96491228 0.94482759 0.95804196 0.94482759 0.96140351
|
|
0.93835616 0.95833333 0.96491228 0.95155709]
|
|
|
|
mean value: 0.9538728885199296
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.41
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03412819 0.0332408 0.03413343 0.03263998 0.03388739 0.03435946
|
|
0.02902317 0.03443003 0.03482413 0.03413296]
|
|
|
|
mean value: 0.033479952812194826
|
|
|
|
key: score_time
|
|
value: [0.01193094 0.01195025 0.01777911 0.01195312 0.01445174 0.01460767
|
|
0.01191902 0.01462698 0.01480126 0.01203799]
|
|
|
|
mean value: 0.013605809211730957
|
|
|
|
key: test_mcc
|
|
value: [0.75592895 0.75592895 0.81409158 0.82717019 0.81409158 0.81409158
|
|
0.80833333 0.9372467 0.68826048 0.6778302 ]
|
|
|
|
mean value: 0.7892973529226506
|
|
|
|
key: train_mcc
|
|
value: [0.86725157 0.8612933 0.84641474 0.89492115 0.86052165 0.86794223
|
|
0.84766497 0.84766497 0.86842762 0.87508713]
|
|
|
|
mean value: 0.8637189322784831
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.90625 0.90625 0.90625 0.90625
|
|
0.90322581 0.96774194 0.83870968 0.83870968]
|
|
|
|
mean value: 0.8923387096774194
|
|
|
|
key: train_accuracy
|
|
value: [0.93309859 0.92957746 0.92253521 0.9471831 0.92957746 0.93309859
|
|
0.92280702 0.92280702 0.93333333 0.93684211]
|
|
|
|
mean value: 0.9310859896219421
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.88235294 0.90909091 0.91428571 0.90909091 0.90909091
|
|
0.90322581 0.96551724 0.85714286 0.84848485]
|
|
|
|
mean value: 0.8980635077370012
|
|
|
|
key: train_fscore
|
|
value: [0.9347079 0.93197279 0.92465753 0.94809689 0.93150685 0.93515358
|
|
0.92567568 0.92567568 0.93515358 0.93835616]
|
|
|
|
mean value: 0.9330956645240915
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.88235294 0.84210526 0.88235294 0.88235294
|
|
0.875 1. 0.78947368 0.82352941]
|
|
|
|
mean value: 0.8643833849329206
|
|
|
|
key: train_precision
|
|
value: [0.91275168 0.90131579 0.9 0.93197279 0.90666667 0.90728477
|
|
0.89542484 0.89542484 0.90728477 0.91333333]
|
|
|
|
mean value: 0.9071459466068135
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 0.9375 1. 0.9375 0.9375
|
|
0.93333333 0.93333333 0.9375 0.875 ]
|
|
|
|
mean value: 0.9366666666666666
|
|
|
|
key: train_recall
|
|
value: [0.95774648 0.96478873 0.95070423 0.96478873 0.95774648 0.96478873
|
|
0.95804196 0.95804196 0.96478873 0.96478873]
|
|
|
|
mean value: 0.9606224761154338
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.90625 0.90625 0.90625 0.90625
|
|
0.90416667 0.96666667 0.83541667 0.8375 ]
|
|
|
|
mean value: 0.891875
|
|
|
|
key: train_roc_auc
|
|
value: [0.93309859 0.92957746 0.92253521 0.9471831 0.92957746 0.93309859
|
|
0.92268295 0.92268295 0.93344332 0.93693982]
|
|
|
|
mean value: 0.9310819462227913
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.78947368 0.83333333 0.84210526 0.83333333 0.83333333
|
|
0.82352941 0.93333333 0.75 0.73684211]
|
|
|
|
mean value: 0.8164757481940145
|
|
|
|
key: train_jcc
|
|
value: [0.87741935 0.87261146 0.85987261 0.90131579 0.87179487 0.87820513
|
|
0.86163522 0.86163522 0.87820513 0.88387097]
|
|
|
|
mean value: 0.8746565756944151
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.91829085 0.83430195 0.91920638 0.83813977 0.83696795 0.96647
|
|
0.8144691 0.92707682 0.77499366 0.80930591]
|
|
|
|
mean value: 0.8639222383499146
|
|
|
|
key: score_time
|
|
value: [0.01481414 0.01517582 0.01258183 0.0154736 0.01538038 0.01537609
|
|
0.01545954 0.01542616 0.01332378 0.01214576]
|
|
|
|
mean value: 0.01451570987701416
|
|
|
|
key: test_mcc
|
|
value: [0.75592895 0.75592895 0.93933644 0.8819171 0.93933644 0.81409158
|
|
0.74166667 0.82078268 0.74689528 0.74896053]
|
|
|
|
mean value: 0.8144844609786615
|
|
|
|
key: train_mcc
|
|
value: [0.99298237 0.98591549 0.98591549 1. 0.98591549 0.98591549
|
|
1. 0.98596474 0.90211827 1. ]
|
|
|
|
mean value: 0.9824727346759803
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.96875 0.9375 0.96875 0.90625
|
|
0.87096774 0.90322581 0.87096774 0.87096774]
|
|
|
|
mean value: 0.9047379032258065
|
|
|
|
key: train_accuracy
|
|
value: [0.99647887 0.99295775 0.99295775 1. 0.99295775 0.99295775
|
|
1. 0.99298246 0.95087719 1. ]
|
|
|
|
mean value: 0.9912169508277736
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.88235294 0.96969697 0.94117647 0.96969697 0.90322581
|
|
0.86666667 0.88888889 0.88235294 0.86666667]
|
|
|
|
mean value: 0.9053077262185422
|
|
|
|
key: train_fscore
|
|
value: [0.99649123 0.99295775 0.99295775 1. 0.99295775 0.99295775
|
|
1. 0.99300699 0.95138889 1. ]
|
|
|
|
mean value: 0.9912718095881551
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.94117647 0.88888889 0.94117647 0.93333333
|
|
0.86666667 1. 0.83333333 0.92857143]
|
|
|
|
mean value: 0.8999813258636788
|
|
|
|
key: train_precision
|
|
value: [0.99300699 0.99295775 0.99295775 1. 0.99295775 0.99295775
|
|
1. 0.99300699 0.93835616 1. ]
|
|
|
|
mean value: 0.9896201136313041
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 1. 1. 1. 0.875
|
|
0.86666667 0.8 0.9375 0.8125 ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 0.99295775 0.99295775 1. 0.99295775 0.99295775
|
|
1. 0.99300699 0.96478873 1. ]
|
|
|
|
mean value: 0.9929626711316852
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.96875 0.9375 0.96875 0.90625
|
|
0.87083333 0.9 0.86875 0.87291667]
|
|
|
|
mean value: 0.904375
|
|
|
|
key: train_roc_auc
|
|
value: [0.99647887 0.99295775 0.99295775 1. 0.99295775 0.99295775
|
|
1. 0.99298237 0.95092583 1. ]
|
|
|
|
mean value: 0.9912218063626514
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.78947368 0.94117647 0.88888889 0.94117647 0.82352941
|
|
0.76470588 0.8 0.78947368 0.76470588]
|
|
|
|
mean value: 0.8292604059167527
|
|
|
|
key: train_jcc
|
|
value: [0.99300699 0.98601399 0.98601399 1. 0.98601399 0.98601399
|
|
1. 0.98611111 0.90728477 1. ]
|
|
|
|
mean value: 0.9830458816385969
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01387405 0.00984526 0.01041555 0.0104239 0.00973701 0.00974059
|
|
0.01031542 0.00996995 0.00954819 0.01026106]
|
|
|
|
mean value: 0.010413098335266113
|
|
|
|
key: score_time
|
|
value: [0.00935388 0.0093286 0.00918818 0.00929189 0.00887823 0.0089097
|
|
0.00947213 0.00943756 0.00906706 0.00956702]
|
|
|
|
mean value: 0.009249424934387207
|
|
|
|
key: test_mcc
|
|
value: [0.57265629 0.44539933 0.67419986 0.31311215 0.37796447 0.69991324
|
|
0.6125 0.5612264 0.54812195 0.48527095]
|
|
|
|
mean value: 0.5290364639771098
|
|
|
|
key: train_mcc
|
|
value: [0.58060405 0.63028696 0.59100561 0.59378186 0.60524671 0.63786488
|
|
0.63536949 0.55859525 0.64298155 0.60682055]
|
|
|
|
mean value: 0.6082556910309478
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.71875 0.8125 0.65625 0.6875 0.84375
|
|
0.80645161 0.77419355 0.77419355 0.74193548]
|
|
|
|
mean value: 0.7596774193548387
|
|
|
|
key: train_accuracy
|
|
value: [0.77464789 0.81338028 0.79225352 0.79225352 0.79929577 0.81690141
|
|
0.81754386 0.7754386 0.81754386 0.8 ]
|
|
|
|
mean value: 0.7999258710155671
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.74285714 0.84210526 0.66666667 0.70588235 0.85714286
|
|
0.8 0.78787879 0.78787879 0.76470588]
|
|
|
|
mean value: 0.7755117740876255
|
|
|
|
key: train_fscore
|
|
value: [0.80606061 0.82274247 0.80655738 0.80906149 0.81311475 0.82666667
|
|
0.81560284 0.79354839 0.83006536 0.81311475]
|
|
|
|
mean value: 0.8136534705016032
|
|
|
|
key: test_precision
|
|
value: [0.73684211 0.68421053 0.72727273 0.64705882 0.66666667 0.78947368
|
|
0.8 0.72222222 0.76470588 0.72222222]
|
|
|
|
mean value: 0.7260674860055665
|
|
|
|
key: train_precision
|
|
value: [0.70744681 0.78343949 0.75460123 0.74850299 0.7607362 0.78481013
|
|
0.82733813 0.73652695 0.77439024 0.7607362 ]
|
|
|
|
mean value: 0.763852835868928
|
|
|
|
key: test_recall
|
|
value: [0.875 0.8125 1. 0.6875 0.75 0.9375
|
|
0.8 0.86666667 0.8125 0.8125 ]
|
|
|
|
mean value: 0.8354166666666667
|
|
|
|
key: train_recall
|
|
value: [0.93661972 0.86619718 0.86619718 0.88028169 0.87323944 0.87323944
|
|
0.8041958 0.86013986 0.8943662 0.87323944]
|
|
|
|
mean value: 0.8727715946025805
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.71875 0.8125 0.65625 0.6875 0.84375
|
|
0.80625 0.77708333 0.77291667 0.73958333]
|
|
|
|
mean value: 0.7595833333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.77464789 0.81338028 0.79225352 0.79225352 0.79929577 0.81690141
|
|
0.81759086 0.77514035 0.81781247 0.80025608]
|
|
|
|
mean value: 0.7999532157982862
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.59090909 0.72727273 0.5 0.54545455 0.75
|
|
0.66666667 0.65 0.65 0.61904762]
|
|
|
|
mean value: 0.6366017316017316
|
|
|
|
key: train_jcc
|
|
value: [0.6751269 0.69886364 0.67582418 0.67934783 0.68508287 0.70454545
|
|
0.68862275 0.65775401 0.70949721 0.68508287]
|
|
|
|
mean value: 0.6859747714119993
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00973558 0.00966954 0.01009893 0.00950837 0.00984263 0.01027656
|
|
0.00975704 0.01026225 0.01067615 0.01004601]
|
|
|
|
mean value: 0.009987306594848634
|
|
|
|
key: score_time
|
|
value: [0.00911403 0.00956821 0.00901866 0.00881863 0.00951982 0.00912619
|
|
0.00956416 0.00889802 0.00927353 0.00916696]
|
|
|
|
mean value: 0.009206819534301757
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.51639778 0.72374686 0.69991324 0.56360186 0.56360186
|
|
0.6125 0.6125 0.54812195 0.48333333]
|
|
|
|
mean value: 0.5953657681593737
|
|
|
|
key: train_mcc
|
|
value: [0.72009768 0.71945253 0.65494582 0.71270053 0.69351968 0.67848335
|
|
0.70693066 0.70025076 0.70041244 0.73046876]
|
|
|
|
mean value: 0.7017262203984271
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.84375 0.84375 0.78125 0.78125
|
|
0.80645161 0.80645161 0.77419355 0.74193548]
|
|
|
|
mean value: 0.7941532258064516
|
|
|
|
key: train_accuracy
|
|
value: [0.85915493 0.85915493 0.82746479 0.8556338 0.84507042 0.83802817
|
|
0.85263158 0.84912281 0.84912281 0.86315789]
|
|
|
|
mean value: 0.8498542129972819
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.77777778 0.86486486 0.85714286 0.77419355 0.77419355
|
|
0.8 0.8 0.78787879 0.75 ]
|
|
|
|
mean value: 0.8009580796203187
|
|
|
|
key: train_fscore
|
|
value: [0.86394558 0.8630137 0.82807018 0.86006826 0.85234899 0.84459459
|
|
0.85810811 0.85521886 0.85423729 0.86956522]
|
|
|
|
mean value: 0.8549170768422738
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.7 0.76190476 0.78947368 0.8 0.8
|
|
0.8 0.8 0.76470588 0.75 ]
|
|
|
|
mean value: 0.7743862106246007
|
|
|
|
key: train_precision
|
|
value: [0.83552632 0.84 0.82517483 0.83443709 0.81410256 0.81168831
|
|
0.83006536 0.82467532 0.82352941 0.82802548]
|
|
|
|
mean value: 0.8267224676472051
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.9375 0.75 0.75 0.8 0.8 0.8125 0.75 ]
|
|
|
|
mean value: 0.835
|
|
|
|
key: train_recall
|
|
value: [0.8943662 0.88732394 0.83098592 0.88732394 0.8943662 0.88028169
|
|
0.88811189 0.88811189 0.88732394 0.91549296]
|
|
|
|
mean value: 0.885368856495617
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.75 0.84375 0.84375 0.78125 0.78125
|
|
0.80625 0.80625 0.77291667 0.74166667]
|
|
|
|
mean value: 0.7939583333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.85915493 0.85915493 0.82746479 0.8556338 0.84507042 0.83802817
|
|
0.85250665 0.84898552 0.84925638 0.86334088]
|
|
|
|
mean value: 0.8498596473948588
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.63636364 0.76190476 0.75 0.63157895 0.63157895
|
|
0.66666667 0.66666667 0.65 0.6 ]
|
|
|
|
mean value: 0.6694759626338573
|
|
|
|
key: train_jcc
|
|
value: [0.76047904 0.75903614 0.70658683 0.75449102 0.74269006 0.73099415
|
|
0.75147929 0.74705882 0.74556213 0.76923077]
|
|
|
|
mean value: 0.7467608254210698
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01051188 0.01002789 0.0098722 0.00999093 0.00889993 0.00897336
|
|
0.00993133 0.01016641 0.00993896 0.00988293]
|
|
|
|
mean value: 0.009819579124450684
|
|
|
|
key: score_time
|
|
value: [0.0114634 0.01155281 0.01165009 0.01547885 0.01105022 0.01161242
|
|
0.0118773 0.01145554 0.01150084 0.01167846]
|
|
|
|
mean value: 0.011931991577148438
|
|
|
|
key: test_mcc
|
|
value: [0.56360186 0.32897585 0.77459667 0.438357 0.438357 0.62994079
|
|
0.50443936 0.28870546 0.225 0.48954403]
|
|
|
|
mean value: 0.46815180211506136
|
|
|
|
key: train_mcc
|
|
value: [0.59207807 0.6479516 0.61342184 0.66916344 0.64445071 0.61452264
|
|
0.61517352 0.64330646 0.60128363 0.61483888]
|
|
|
|
mean value: 0.6256190785735106
|
|
|
|
key: test_accuracy
|
|
value: [0.78125 0.65625 0.875 0.71875 0.71875 0.8125
|
|
0.74193548 0.64516129 0.61290323 0.74193548]
|
|
|
|
mean value: 0.7304435483870968
|
|
|
|
key: train_accuracy
|
|
value: [0.79577465 0.82394366 0.80633803 0.83450704 0.82042254 0.80633803
|
|
0.80701754 0.82105263 0.8 0.80701754]
|
|
|
|
mean value: 0.8122411662960217
|
|
|
|
key: test_fscore
|
|
value: [0.78787879 0.7027027 0.88888889 0.72727273 0.72727273 0.82352941
|
|
0.76470588 0.62068966 0.625 0.73333333]
|
|
|
|
mean value: 0.7401274116639228
|
|
|
|
key: train_fscore
|
|
value: [0.8 0.82517483 0.81099656 0.83623693 0.82943144 0.81355932
|
|
0.81355932 0.82711864 0.80546075 0.81099656]
|
|
|
|
mean value: 0.8172534363236427
|
|
|
|
key: test_precision
|
|
value: [0.76470588 0.61904762 0.8 0.70588235 0.70588235 0.77777778
|
|
0.68421053 0.64285714 0.625 0.78571429]
|
|
|
|
mean value: 0.711107793994791
|
|
|
|
key: train_precision
|
|
value: [0.78378378 0.81944444 0.79194631 0.82758621 0.78980892 0.78431373
|
|
0.78947368 0.80263158 0.78145695 0.79194631]
|
|
|
|
mean value: 0.7962391912062372
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.8125 1. 0.75 0.75 0.875
|
|
0.86666667 0.6 0.625 0.6875 ]
|
|
|
|
mean value: 0.7779166666666667
|
|
|
|
key: train_recall
|
|
value: [0.81690141 0.83098592 0.83098592 0.84507042 0.87323944 0.84507042
|
|
0.83916084 0.85314685 0.83098592 0.83098592]
|
|
|
|
mean value: 0.8396533044420368
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.65625 0.875 0.71875 0.71875 0.8125
|
|
0.74583333 0.64375 0.6125 0.74375 ]
|
|
|
|
mean value: 0.7308333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.79577465 0.82394366 0.80633803 0.83450704 0.82042254 0.80633803
|
|
0.80690436 0.82093962 0.80010834 0.80710135]
|
|
|
|
mean value: 0.8122377622377622
|
|
|
|
key: test_jcc
|
|
value: [0.65 0.54166667 0.8 0.57142857 0.57142857 0.7
|
|
0.61904762 0.45 0.45454545 0.57894737]
|
|
|
|
mean value: 0.5937064251537936
|
|
|
|
key: train_jcc
|
|
value: [0.66666667 0.70238095 0.68208092 0.71856287 0.70857143 0.68571429
|
|
0.68571429 0.70520231 0.67428571 0.68208092]
|
|
|
|
mean value: 0.6911260369434541
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0170126 0.0137372 0.01393175 0.01377177 0.01376629 0.01400828
|
|
0.01391697 0.01411223 0.01390743 0.01402569]
|
|
|
|
mean value: 0.014219021797180176
|
|
|
|
key: score_time
|
|
value: [0.01044726 0.01004076 0.00990629 0.00992775 0.01005983 0.01006055
|
|
0.01024079 0.00993419 0.00997138 0.01006269]
|
|
|
|
mean value: 0.010065150260925294
|
|
|
|
key: test_mcc
|
|
value: [0.75592895 0.64549722 0.8819171 0.64549722 0.625 0.69991324
|
|
0.61925228 0.87083333 0.55573827 0.74689528]
|
|
|
|
mean value: 0.704647291079871
|
|
|
|
key: train_mcc
|
|
value: [0.8015394 0.8015394 0.8015394 0.82090085 0.79514657 0.79667392
|
|
0.80213695 0.78160256 0.82281252 0.78329205]
|
|
|
|
mean value: 0.8007183608777234
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.8125 0.9375 0.8125 0.8125 0.84375
|
|
0.80645161 0.93548387 0.77419355 0.87096774]
|
|
|
|
mean value: 0.8480846774193548
|
|
|
|
key: train_accuracy
|
|
value: [0.89788732 0.89788732 0.89788732 0.9084507 0.8943662 0.8943662
|
|
0.89824561 0.8877193 0.90877193 0.8877193 ]
|
|
|
|
mean value: 0.8973301210773412
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.83333333 0.94117647 0.83333333 0.8125 0.85714286
|
|
0.8125 0.93333333 0.8 0.88235294]
|
|
|
|
mean value: 0.8588025210084034
|
|
|
|
key: train_fscore
|
|
value: [0.90365449 0.90365449 0.90365449 0.91275168 0.90066225 0.90131579
|
|
0.90429043 0.89473684 0.91333333 0.89473684]
|
|
|
|
mean value: 0.9032790620717928
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.75 0.88888889 0.75 0.8125 0.78947368
|
|
0.76470588 0.93333333 0.73684211 0.83333333]
|
|
|
|
mean value: 0.8092410560715514
|
|
|
|
key: train_precision
|
|
value: [0.85534591 0.85534591 0.85534591 0.87179487 0.85 0.84567901
|
|
0.85625 0.8447205 0.86708861 0.83950617]
|
|
|
|
mean value: 0.854107689731846
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 1. 0.9375 0.8125 0.9375
|
|
0.86666667 0.93333333 0.875 0.9375 ]
|
|
|
|
mean value: 0.9175
|
|
|
|
key: train_recall
|
|
value: [0.95774648 0.95774648 0.95774648 0.95774648 0.95774648 0.96478873
|
|
0.95804196 0.95104895 0.96478873 0.95774648]
|
|
|
|
mean value: 0.9585147247119078
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.8125 0.9375 0.8125 0.8125 0.84375
|
|
0.80833333 0.93541667 0.77083333 0.86875 ]
|
|
|
|
mean value: 0.8477083333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.89788732 0.89788732 0.89788732 0.9084507 0.8943662 0.8943662
|
|
0.89803506 0.88749631 0.90896779 0.88796415]
|
|
|
|
mean value: 0.8973308381759086
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.71428571 0.88888889 0.71428571 0.68421053 0.75
|
|
0.68421053 0.875 0.66666667 0.78947368]
|
|
|
|
mean value: 0.7556495405179615
|
|
|
|
key: train_jcc
|
|
value: [0.82424242 0.82424242 0.82424242 0.83950617 0.81927711 0.82035928
|
|
0.8253012 0.80952381 0.8404908 0.80952381]
|
|
|
|
mean value: 0.8236709456850548
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.14150548 1.34942889 1.16798019 1.32275343 1.13871098 1.29670668
|
|
1.15652013 1.27996182 1.19484806 1.22055531]
|
|
|
|
mean value: 1.2268970966339112
|
|
|
|
key: score_time
|
|
value: [0.01491427 0.01574922 0.01575661 0.01515722 0.01519918 0.015342
|
|
0.01536441 0.01537752 0.01538777 0.01550269]
|
|
|
|
mean value: 0.015375089645385743
|
|
|
|
key: test_mcc
|
|
value: [0.625 0.75592895 0.875 0.69991324 0.875 0.75
|
|
0.80833333 0.74689528 0.61608311 0.6778302 ]
|
|
|
|
mean value: 0.742998411791738
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.875 0.9375 0.84375 0.9375 0.875
|
|
0.90322581 0.87096774 0.80645161 0.83870968]
|
|
|
|
mean value: 0.8700604838709678
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8125 0.88235294 0.9375 0.85714286 0.9375 0.875
|
|
0.90322581 0.85714286 0.82352941 0.84848485]
|
|
|
|
mean value: 0.8734378722163352
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.83333333 0.9375 0.78947368 0.9375 0.875
|
|
0.875 0.92307692 0.77777778 0.82352941]
|
|
|
|
mean value: 0.8584691130163267
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.9375 0.9375 0.9375 0.9375 0.875
|
|
0.93333333 0.8 0.875 0.875 ]
|
|
|
|
mean value: 0.8920833333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.875 0.9375 0.84375 0.9375 0.875
|
|
0.90416667 0.86875 0.80416667 0.8375 ]
|
|
|
|
mean value: 0.8695833333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.68421053 0.78947368 0.88235294 0.75 0.88235294 0.77777778
|
|
0.82352941 0.75 0.7 0.73684211]
|
|
|
|
mean value: 0.7776539387684899
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02083063 0.01830387 0.01479101 0.01522541 0.01517844 0.01537442
|
|
0.01601005 0.01530504 0.01531887 0.01610589]
|
|
|
|
mean value: 0.01624436378479004
|
|
|
|
key: score_time
|
|
value: [0.01181269 0.0092442 0.00874877 0.00870419 0.0087533 0.00873685
|
|
0.00871205 0.0087297 0.00876212 0.00880837]
|
|
|
|
mean value: 0.009101223945617676
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.93933644 0.8819171 1. 1. 0.82717019
|
|
0.80753845 1. 0.74166667 0.74166667]
|
|
|
|
mean value: 0.856923630331575
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.96875 0.9375 1. 1. 0.90625
|
|
0.90322581 1. 0.87096774 0.87096774]
|
|
|
|
mean value: 0.927016129032258
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.96969697 0.94117647 1. 1. 0.89655172
|
|
0.89655172 1. 0.875 0.875 ]
|
|
|
|
mean value: 0.9253976888561067
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.94117647 0.88888889 1. 1. 1.
|
|
0.92857143 1. 0.875 0.875 ]
|
|
|
|
mean value: 0.936577964519141
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 1. 1. 0.8125
|
|
0.86666667 1. 0.875 0.875 ]
|
|
|
|
mean value: 0.9179166666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.96875 0.9375 1. 1. 0.90625
|
|
0.90208333 1. 0.87083333 0.87083333]
|
|
|
|
mean value: 0.926875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.94117647 0.88888889 1. 1. 0.8125
|
|
0.8125 1. 0.77777778 0.77777778]
|
|
|
|
mean value: 0.8677287581699347
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10153913 0.10321045 0.10332656 0.10254884 0.10229015 0.10297704
|
|
0.10236096 0.10254765 0.10236812 0.10320854]
|
|
|
|
mean value: 0.10263774394989014
|
|
|
|
key: score_time
|
|
value: [0.01733112 0.0174036 0.0175724 0.0176034 0.01755619 0.01750135
|
|
0.01753855 0.01772189 0.01778698 0.01753831]
|
|
|
|
mean value: 0.017555379867553712
|
|
|
|
key: test_mcc
|
|
value: [0.625 0.69991324 0.8819171 0.68884672 0.62994079 0.81409158
|
|
0.6778302 0.87083333 0.61608311 0.74166667]
|
|
|
|
mean value: 0.7246122745982656
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.84375 0.9375 0.84375 0.8125 0.90625
|
|
0.83870968 0.93548387 0.80645161 0.87096774]
|
|
|
|
mean value: 0.8607862903225807
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8125 0.85714286 0.94117647 0.84848485 0.8 0.90909091
|
|
0.82758621 0.93333333 0.82352941 0.875 ]
|
|
|
|
mean value: 0.8627844037301441
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.78947368 0.88888889 0.82352941 0.85714286 0.88235294
|
|
0.85714286 0.93333333 0.77777778 0.875 ]
|
|
|
|
mean value: 0.8497141751437417
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.9375 1. 0.875 0.75 0.9375
|
|
0.8 0.93333333 0.875 0.875 ]
|
|
|
|
mean value: 0.8795833333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.84375 0.9375 0.84375 0.8125 0.90625
|
|
0.8375 0.93541667 0.80416667 0.87083333]
|
|
|
|
mean value: 0.8604166666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.68421053 0.75 0.88888889 0.73684211 0.66666667 0.83333333
|
|
0.70588235 0.875 0.7 0.77777778]
|
|
|
|
mean value: 0.761860165118679
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00992775 0.00947857 0.0094595 0.00942636 0.00943565 0.00957608
|
|
0.00951862 0.00950575 0.00959396 0.00936556]
|
|
|
|
mean value: 0.009528779983520507
|
|
|
|
key: score_time
|
|
value: [0.00903702 0.00869894 0.00865579 0.00867105 0.00870419 0.00873446
|
|
0.0087626 0.00865459 0.00866556 0.00874281]
|
|
|
|
mean value: 0.00873270034790039
|
|
|
|
key: test_mcc
|
|
value: [0.38729833 0.31311215 0.57265629 0.37796447 0.19088543 0.56360186
|
|
0.29844172 0.4184137 0.61925228 0.6125 ]
|
|
|
|
mean value: 0.4354126237712749
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.65625 0.78125 0.6875 0.59375 0.78125
|
|
0.64516129 0.70967742 0.80645161 0.80645161]
|
|
|
|
mean value: 0.7155241935483871
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.64285714 0.64516129 0.8 0.66666667 0.55172414 0.78787879
|
|
0.56 0.68965517 0.8 0.8125 ]
|
|
|
|
mean value: 0.6956443198070006
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.66666667 0.73684211 0.71428571 0.61538462 0.76470588
|
|
0.7 0.71428571 0.85714286 0.8125 ]
|
|
|
|
mean value: 0.7331813555381667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5625 0.625 0.875 0.625 0.5 0.8125
|
|
0.46666667 0.66666667 0.75 0.8125 ]
|
|
|
|
mean value: 0.6695833333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.65625 0.78125 0.6875 0.59375 0.78125
|
|
0.63958333 0.70833333 0.80833333 0.80625 ]
|
|
|
|
mean value: 0.7150000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.47368421 0.47619048 0.66666667 0.5 0.38095238 0.65
|
|
0.38888889 0.52631579 0.66666667 0.68421053]
|
|
|
|
mean value: 0.5413575605680869
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.41357183 1.40730119 1.40797234 1.41579556 1.43474579 1.41381311
|
|
1.42277074 1.42039752 1.40721416 1.4074707 ]
|
|
|
|
mean value: 1.4151052951812744
|
|
|
|
key: score_time
|
|
value: [0.09626412 0.0903163 0.09786963 0.09362054 0.0960052 0.0972116
|
|
0.09172726 0.09013462 0.14322925 0.09644318]
|
|
|
|
mean value: 0.09928216934204101
|
|
|
|
key: test_mcc
|
|
value: [0.75592895 0.8819171 0.93933644 0.81409158 0.81409158 0.93933644
|
|
0.87083333 0.9372467 0.87083333 0.80753845]
|
|
|
|
mean value: 0.8631153893705837
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.9375 0.96875 0.90625 0.90625 0.96875
|
|
0.93548387 0.96774194 0.93548387 0.90322581]
|
|
|
|
mean value: 0.9304435483870968
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.94117647 0.96969697 0.90909091 0.90909091 0.96969697
|
|
0.93333333 0.96551724 0.9375 0.90909091]
|
|
|
|
mean value: 0.9326546653144017
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.88888889 0.94117647 0.88235294 0.88235294 0.94117647
|
|
0.93333333 1. 0.9375 0.88235294]
|
|
|
|
mean value: 0.9122467320261438
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9375 1. 1. 0.9375 0.9375 1.
|
|
0.93333333 0.93333333 0.9375 0.9375 ]
|
|
|
|
mean value: 0.9554166666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.9375 0.96875 0.90625 0.90625 0.96875
|
|
0.93541667 0.96666667 0.93541667 0.90208333]
|
|
|
|
mean value: 0.9302083333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.88888889 0.94117647 0.83333333 0.83333333 0.94117647
|
|
0.875 0.93333333 0.88235294 0.83333333]
|
|
|
|
mean value: 0.875140178878569
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.31
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.94509768 0.92780328 0.92000961 0.8821938 0.92545223 0.95332265
|
|
0.93400884 0.98241067 0.93518734 0.926929 ]
|
|
|
|
mean value: 0.9332415103912354
|
|
|
|
key: score_time
|
|
value: [0.22051883 0.21201563 0.21300459 0.25284195 0.23016191 0.20851731
|
|
0.26344585 0.20474267 0.2121985 0.26367092]
|
|
|
|
mean value: 0.2281118154525757
|
|
|
|
key: test_mcc
|
|
value: [0.75592895 0.82717019 0.81409158 0.81409158 0.8819171 0.8819171
|
|
0.87083333 0.9372467 0.67916667 0.80753845]
|
|
|
|
mean value: 0.826990164934043
|
|
|
|
key: train_mcc
|
|
value: [0.97221679 0.95129413 0.95812669 0.94450549 0.94403659 0.95129413
|
|
0.95826776 0.95145657 0.95146839 0.9582759 ]
|
|
|
|
mean value: 0.954094243100845
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.90625 0.90625 0.90625 0.9375 0.9375
|
|
0.93548387 0.96774194 0.83870968 0.90322581]
|
|
|
|
mean value: 0.911391129032258
|
|
|
|
key: train_accuracy
|
|
value: [0.98591549 0.97535211 0.97887324 0.97183099 0.97183099 0.97535211
|
|
0.97894737 0.9754386 0.9754386 0.97894737]
|
|
|
|
mean value: 0.9767926859402026
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.91428571 0.90909091 0.90909091 0.94117647 0.94117647
|
|
0.93333333 0.96551724 0.83870968 0.90909091]
|
|
|
|
mean value: 0.9143824576043381
|
|
|
|
key: train_fscore
|
|
value: [0.98611111 0.97577855 0.97916667 0.97241379 0.97222222 0.97577855
|
|
0.97931034 0.97594502 0.97577855 0.97916667]
|
|
|
|
mean value: 0.977167146191824
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.84210526 0.88235294 0.88235294 0.88888889 0.88888889
|
|
0.93333333 1. 0.86666667 0.88235294]
|
|
|
|
mean value: 0.8900275197798417
|
|
|
|
key: train_precision
|
|
value: [0.97260274 0.95918367 0.96575342 0.9527027 0.95890411 0.95918367
|
|
0.96598639 0.95945946 0.95918367 0.96575342]
|
|
|
|
mean value: 0.9618713275758285
|
|
|
|
key: test_recall
|
|
value: [0.9375 1. 0.9375 0.9375 1. 1.
|
|
0.93333333 0.93333333 0.8125 0.9375 ]
|
|
|
|
mean value: 0.9429166666666666
|
|
|
|
key: train_recall
|
|
value: [1. 0.99295775 0.99295775 0.99295775 0.98591549 0.99295775
|
|
0.99300699 0.99300699 0.99295775 0.99295775]
|
|
|
|
mean value: 0.9929675957844972
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.90625 0.90625 0.90625 0.9375 0.9375
|
|
0.93541667 0.96666667 0.83958333 0.90208333]
|
|
|
|
mean value: 0.91125
|
|
|
|
key: train_roc_auc
|
|
value: [0.98591549 0.97535211 0.97887324 0.97183099 0.97183099 0.97535211
|
|
0.97889786 0.97537674 0.97549985 0.97899636]
|
|
|
|
mean value: 0.9767925736235595
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.84210526 0.83333333 0.83333333 0.88888889 0.88888889
|
|
0.875 0.93333333 0.72222222 0.83333333]
|
|
|
|
mean value: 0.8439912280701755
|
|
|
|
key: train_jcc
|
|
value: [0.97260274 0.9527027 0.95918367 0.94630872 0.94594595 0.9527027
|
|
0.95945946 0.95302013 0.9527027 0.95918367]
|
|
|
|
mean value: 0.9553812459238719
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.35
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02524734 0.01086307 0.01085806 0.00962305 0.00966716 0.00964165
|
|
0.01023078 0.00999737 0.01087999 0.01071978]
|
|
|
|
mean value: 0.011772823333740235
|
|
|
|
key: score_time
|
|
value: [0.01101589 0.00973225 0.00983381 0.00891066 0.00889468 0.00895429
|
|
0.00897837 0.00976849 0.00972676 0.00902939]
|
|
|
|
mean value: 0.009484457969665527
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.51639778 0.72374686 0.69991324 0.56360186 0.56360186
|
|
0.6125 0.6125 0.54812195 0.48333333]
|
|
|
|
mean value: 0.5953657681593737
|
|
|
|
key: train_mcc
|
|
value: [0.72009768 0.71945253 0.65494582 0.71270053 0.69351968 0.67848335
|
|
0.70693066 0.70025076 0.70041244 0.73046876]
|
|
|
|
mean value: 0.7017262203984271
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.84375 0.84375 0.78125 0.78125
|
|
0.80645161 0.80645161 0.77419355 0.74193548]
|
|
|
|
mean value: 0.7941532258064516
|
|
|
|
key: train_accuracy
|
|
value: [0.85915493 0.85915493 0.82746479 0.8556338 0.84507042 0.83802817
|
|
0.85263158 0.84912281 0.84912281 0.86315789]
|
|
|
|
mean value: 0.8498542129972819
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.77777778 0.86486486 0.85714286 0.77419355 0.77419355
|
|
0.8 0.8 0.78787879 0.75 ]
|
|
|
|
mean value: 0.8009580796203187
|
|
|
|
key: train_fscore
|
|
value: [0.86394558 0.8630137 0.82807018 0.86006826 0.85234899 0.84459459
|
|
0.85810811 0.85521886 0.85423729 0.86956522]
|
|
|
|
mean value: 0.8549170768422738
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.7 0.76190476 0.78947368 0.8 0.8
|
|
0.8 0.8 0.76470588 0.75 ]
|
|
|
|
mean value: 0.7743862106246007
|
|
|
|
key: train_precision
|
|
value: [0.83552632 0.84 0.82517483 0.83443709 0.81410256 0.81168831
|
|
0.83006536 0.82467532 0.82352941 0.82802548]
|
|
|
|
mean value: 0.8267224676472051
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.9375 0.75 0.75 0.8 0.8 0.8125 0.75 ]
|
|
|
|
mean value: 0.835
|
|
|
|
key: train_recall
|
|
value: [0.8943662 0.88732394 0.83098592 0.88732394 0.8943662 0.88028169
|
|
0.88811189 0.88811189 0.88732394 0.91549296]
|
|
|
|
mean value: 0.885368856495617
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.75 0.84375 0.84375 0.78125 0.78125
|
|
0.80625 0.80625 0.77291667 0.74166667]
|
|
|
|
mean value: 0.7939583333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.85915493 0.85915493 0.82746479 0.8556338 0.84507042 0.83802817
|
|
0.85250665 0.84898552 0.84925638 0.86334088]
|
|
|
|
mean value: 0.8498596473948588
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.63636364 0.76190476 0.75 0.63157895 0.63157895
|
|
0.66666667 0.66666667 0.65 0.6 ]
|
|
|
|
mean value: 0.6694759626338573
|
|
|
|
key: train_jcc
|
|
value: [0.76047904 0.75903614 0.70658683 0.75449102 0.74269006 0.73099415
|
|
0.75147929 0.74705882 0.74556213 0.76923077]
|
|
|
|
mean value: 0.7467608254210698
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.08318973 0.05189371 0.05080152 0.05236053 0.0516212 0.0818789
|
|
0.05003667 0.05707097 0.05641842 0.13113332]
|
|
|
|
mean value: 0.06664049625396729
|
|
|
|
key: score_time
|
|
value: [0.01104307 0.01070857 0.01106858 0.01062179 0.01056743 0.01061416
|
|
0.01038623 0.01031113 0.01025176 0.01268387]
|
|
|
|
mean value: 0.010825657844543457
|
|
|
|
key: test_mcc
|
|
value: [0.81409158 0.93933644 0.93933644 0.93933644 0.875 1.
|
|
0.87083333 1. 0.9372467 0.87083333]
|
|
|
|
mean value: 0.9186014252766943
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.96875 0.96875 0.96875 0.9375 1.
|
|
0.93548387 1. 0.96774194 0.93548387]
|
|
|
|
mean value: 0.9588709677419355
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.96969697 0.96969697 0.96969697 0.9375 1.
|
|
0.93333333 1. 0.96969697 0.9375 ]
|
|
|
|
mean value: 0.9596212121212121
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.94117647 0.94117647 0.94117647 0.9375 1.
|
|
0.93333333 1. 0.94117647 0.9375 ]
|
|
|
|
mean value: 0.9455392156862745
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9375 1. 1. 1. 0.9375 1.
|
|
0.93333333 1. 1. 0.9375 ]
|
|
|
|
mean value: 0.9745833333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.96875 0.96875 0.96875 0.9375 1.
|
|
0.93541667 1. 0.96666667 0.93541667]
|
|
|
|
mean value: 0.95875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.94117647 0.94117647 0.94117647 0.88235294 1.
|
|
0.875 1. 0.94117647 0.88235294]
|
|
|
|
mean value: 0.9237745098039216
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03818059 0.06525922 0.06525731 0.06771278 0.06377387 0.06696844
|
|
0.06272554 0.05751729 0.05288243 0.0317173 ]
|
|
|
|
mean value: 0.05719947814941406
|
|
|
|
key: score_time
|
|
value: [0.0220902 0.01350856 0.02089882 0.01204014 0.02447152 0.02816701
|
|
0.0265007 0.02375865 0.01212239 0.02076554]
|
|
|
|
mean value: 0.020432353019714355
|
|
|
|
key: test_mcc
|
|
value: [0.81409158 0.81409158 0.93933644 0.72374686 0.93933644 0.93933644
|
|
0.67916667 0.80753845 0.80753845 0.74896053]
|
|
|
|
mean value: 0.8213143427561892
|
|
|
|
key: train_mcc
|
|
value: [0.9860133 0.97192739 0.97183099 0.97192739 0.97889751 0.97192739
|
|
0.97202385 0.96512319 0.9720266 0.9720266 ]
|
|
|
|
mean value: 0.973372421059164
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.90625 0.96875 0.84375 0.96875 0.96875
|
|
0.83870968 0.90322581 0.90322581 0.87096774]
|
|
|
|
mean value: 0.9078629032258064
|
|
|
|
key: train_accuracy
|
|
value: [0.99295775 0.98591549 0.98591549 0.98591549 0.98943662 0.98591549
|
|
0.98596491 0.98245614 0.98596491 0.98596491]
|
|
|
|
mean value: 0.9866407215221151
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.96969697 0.86486486 0.96969697 0.96969697
|
|
0.83870968 0.89655172 0.90909091 0.86666667]
|
|
|
|
mean value: 0.9103156569452454
|
|
|
|
key: train_fscore
|
|
value: [0.99300699 0.98601399 0.98591549 0.98601399 0.98947368 0.98601399
|
|
0.98611111 0.98269896 0.98601399 0.98601399]
|
|
|
|
mean value: 0.9867276173294023
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.88235294 0.94117647 0.76190476 0.94117647 0.94117647
|
|
0.8125 0.92857143 0.88235294 0.92857143]
|
|
|
|
mean value: 0.8902135854341736
|
|
|
|
key: train_precision
|
|
value: [0.98611111 0.97916667 0.98591549 0.97916667 0.98601399 0.97916667
|
|
0.97931034 0.97260274 0.97916667 0.97916667]
|
|
|
|
mean value: 0.9805787007969791
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 1. 1. 1. 1.
|
|
0.86666667 0.86666667 0.9375 0.8125 ]
|
|
|
|
mean value: 0.9358333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 0.99295775 0.98591549 0.99295775 0.99295775 0.99295775
|
|
0.99300699 0.99300699 0.99295775 0.99295775]
|
|
|
|
mean value: 0.9929675957844972
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.90625 0.96875 0.84375 0.96875 0.96875
|
|
0.83958333 0.90208333 0.90208333 0.87291667]
|
|
|
|
mean value: 0.9079166666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.99295775 0.98591549 0.98591549 0.98591549 0.98943662 0.98591549
|
|
0.98594012 0.98241899 0.98598936 0.98598936]
|
|
|
|
mean value: 0.986639416921107
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.94117647 0.76190476 0.94117647 0.94117647
|
|
0.72222222 0.8125 0.83333333 0.76470588]
|
|
|
|
mean value: 0.8384862278244631
|
|
|
|
key: train_jcc
|
|
value: [0.98611111 0.97241379 0.97222222 0.97241379 0.97916667 0.97241379
|
|
0.97260274 0.96598639 0.97241379 0.97241379]
|
|
|
|
mean value: 0.9738158099801092
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.47
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01269746 0.01149964 0.01052713 0.01056075 0.0101912 0.01011467
|
|
0.01012588 0.01005483 0.01007462 0.01022625]
|
|
|
|
mean value: 0.010607242584228516
|
|
|
|
key: score_time
|
|
value: [0.01169109 0.00982738 0.00970292 0.00943661 0.00925064 0.00929761
|
|
0.00929666 0.00924301 0.00927711 0.00933456]
|
|
|
|
mean value: 0.00963575839996338
|
|
|
|
key: test_mcc
|
|
value: [0.75 0.51639778 0.77459667 0.62994079 0.75592895 0.625
|
|
0.48333333 0.61925228 0.6310315 0.61925228]
|
|
|
|
mean value: 0.640473358423418
|
|
|
|
key: train_mcc
|
|
value: [0.7275383 0.69185856 0.66256355 0.70767315 0.68515743 0.70223363
|
|
0.70631586 0.63559314 0.70986095 0.66750412]
|
|
|
|
mean value: 0.6896298694982799
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.75 0.875 0.8125 0.875 0.8125
|
|
0.74193548 0.80645161 0.80645161 0.80645161]
|
|
|
|
mean value: 0.8161290322580645
|
|
|
|
key: train_accuracy
|
|
value: [0.86267606 0.84507042 0.83098592 0.85211268 0.8415493 0.84859155
|
|
0.85263158 0.81754386 0.85263158 0.83157895]
|
|
|
|
mean value: 0.8435371880405238
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.77777778 0.88888889 0.82352941 0.88235294 0.8125
|
|
0.73333333 0.8125 0.83333333 0.8 ]
|
|
|
|
mean value: 0.823921568627451
|
|
|
|
key: train_fscore
|
|
value: [0.86779661 0.85034014 0.83448276 0.8590604 0.84745763 0.85714286
|
|
0.85714286 0.82191781 0.86 0.84 ]
|
|
|
|
mean value: 0.8495341057152703
|
|
|
|
key: test_precision
|
|
value: [0.875 0.7 0.8 0.77777778 0.83333333 0.8125
|
|
0.73333333 0.76470588 0.75 0.85714286]
|
|
|
|
mean value: 0.7903793183940243
|
|
|
|
key: train_precision
|
|
value: [0.83660131 0.82236842 0.81756757 0.82051282 0.81699346 0.81132075
|
|
0.83443709 0.80536913 0.8164557 0.79746835]
|
|
|
|
mean value: 0.8179094599334236
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.875 0.9375 0.8125
|
|
0.73333333 0.86666667 0.9375 0.75 ]
|
|
|
|
mean value: 0.86625
|
|
|
|
key: train_recall
|
|
value: [0.90140845 0.88028169 0.85211268 0.90140845 0.88028169 0.9084507
|
|
0.88111888 0.83916084 0.9084507 0.88732394]
|
|
|
|
mean value: 0.8839998030138876
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.875 0.8125 0.875 0.8125
|
|
0.74166667 0.80833333 0.80208333 0.80833333]
|
|
|
|
mean value: 0.8160416666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.86267606 0.84507042 0.83098592 0.85211268 0.8415493 0.84859155
|
|
0.85253127 0.81746774 0.85282675 0.83177386]
|
|
|
|
mean value: 0.8435585541219344
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.63636364 0.8 0.7 0.78947368 0.68421053
|
|
0.57894737 0.68421053 0.71428571 0.66666667]
|
|
|
|
mean value: 0.7031935900356953
|
|
|
|
key: train_jcc
|
|
value: [0.76646707 0.73964497 0.71597633 0.75294118 0.73529412 0.75
|
|
0.75 0.69767442 0.75438596 0.72413793]
|
|
|
|
mean value: 0.7386521976312473
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01371336 0.01587415 0.01515174 0.02035451 0.01662254 0.02007604
|
|
0.0158751 0.021281 0.01977491 0.01828241]
|
|
|
|
mean value: 0.017700576782226564
|
|
|
|
key: score_time
|
|
value: [0.00933337 0.01098824 0.0109973 0.01164103 0.01162219 0.01165724
|
|
0.01247501 0.01168752 0.01164079 0.01164222]
|
|
|
|
mean value: 0.011368489265441895
|
|
|
|
key: test_mcc
|
|
value: [0.75592895 0.75592895 0.53935989 0.82717019 0.875 0.68884672
|
|
0.74896053 0.60910959 0.71269665 0.6778302 ]
|
|
|
|
mean value: 0.7190831661199489
|
|
|
|
key: train_mcc
|
|
value: [0.94450549 0.89545487 0.68840989 0.93105621 0.92274116 0.97183099
|
|
0.86506676 0.9115139 0.90499493 0.93704438]
|
|
|
|
mean value: 0.8972618583980994
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.75 0.90625 0.9375 0.84375
|
|
0.87096774 0.77419355 0.83870968 0.83870968]
|
|
|
|
mean value: 0.8510080645161291
|
|
|
|
key: train_accuracy
|
|
value: [0.97183099 0.9471831 0.82394366 0.96478873 0.96126761 0.98591549
|
|
0.92982456 0.95438596 0.95087719 0.96842105]
|
|
|
|
mean value: 0.9458438349394613
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.88235294 0.69230769 0.91428571 0.9375 0.83870968
|
|
0.875 0.69565217 0.86486486 0.84848485]
|
|
|
|
mean value: 0.8431510853628459
|
|
|
|
key: train_fscore
|
|
value: [0.97241379 0.94845361 0.78813559 0.96575342 0.96167247 0.98591549
|
|
0.93377483 0.95272727 0.9527027 0.96797153]
|
|
|
|
mean value: 0.9429520726170258
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.9 0.84210526 0.9375 0.86666667
|
|
0.82352941 1. 0.76190476 0.82352941]
|
|
|
|
mean value: 0.8621902181925402
|
|
|
|
key: train_precision
|
|
value: [0.9527027 0.9261745 0.9893617 0.94 0.95172414 0.98591549
|
|
0.88679245 0.99242424 0.91558442 0.97841727]
|
|
|
|
mean value: 0.9519096909389335
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 0.5625 1. 0.9375 0.8125
|
|
0.93333333 0.53333333 1. 0.875 ]
|
|
|
|
mean value: 0.8529166666666667
|
|
|
|
key: train_recall
|
|
value: [0.99295775 0.97183099 0.65492958 0.99295775 0.97183099 0.98591549
|
|
0.98601399 0.91608392 0.99295775 0.95774648]
|
|
|
|
mean value: 0.9423224662661283
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.75 0.90625 0.9375 0.84375
|
|
0.87291667 0.76666667 0.83333333 0.8375 ]
|
|
|
|
mean value: 0.8497916666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.97183099 0.9471831 0.82394366 0.96478873 0.96126761 0.98591549
|
|
0.92962671 0.95452083 0.95102433 0.96838373]
|
|
|
|
mean value: 0.9458485176795036
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.78947368 0.52941176 0.84210526 0.88235294 0.72222222
|
|
0.77777778 0.53333333 0.76190476 0.73684211]
|
|
|
|
mean value: 0.7364897537962554
|
|
|
|
key: train_jcc
|
|
value: [0.94630872 0.90196078 0.65034965 0.93377483 0.9261745 0.97222222
|
|
0.8757764 0.90972222 0.90967742 0.93793103]
|
|
|
|
mean value: 0.8963897786374542
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.44
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01716423 0.01853156 0.01632261 0.01627469 0.01817465 0.01648641
|
|
0.01762009 0.0174396 0.01730609 0.01503563]
|
|
|
|
mean value: 0.017035555839538575
|
|
|
|
key: score_time
|
|
value: [0.01252532 0.01231885 0.01208925 0.01201677 0.01193023 0.01193023
|
|
0.01202798 0.01192021 0.01187968 0.01195002]
|
|
|
|
mean value: 0.012058854103088379
|
|
|
|
key: test_mcc
|
|
value: [0.75592895 0.62994079 0.67419986 0.67419986 0.75592895 0.67419986
|
|
0.82285074 0.9375 0.74689528 0.55573827]
|
|
|
|
mean value: 0.7227382560801155
|
|
|
|
key: train_mcc
|
|
value: [0.89939824 0.83774371 0.74290818 0.8145351 0.81662226 0.8535792
|
|
0.88699028 0.93127922 0.92393444 0.79590827]
|
|
|
|
mean value: 0.8502898903501691
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.8125 0.8125 0.8125 0.875 0.8125
|
|
0.90322581 0.96774194 0.87096774 0.77419355]
|
|
|
|
mean value: 0.8516129032258064
|
|
|
|
key: train_accuracy
|
|
value: [0.9471831 0.91549296 0.8556338 0.90140845 0.90140845 0.92253521
|
|
0.94035088 0.96491228 0.96140351 0.8877193 ]
|
|
|
|
mean value: 0.9198047936743267
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.8 0.84210526 0.76923077 0.86666667 0.76923077
|
|
0.90909091 0.96774194 0.88235294 0.8 ]
|
|
|
|
mean value: 0.8488772195213821
|
|
|
|
key: train_fscore
|
|
value: [0.94983278 0.90977444 0.87384615 0.89230769 0.89147287 0.91666667
|
|
0.94389439 0.96598639 0.96219931 0.89873418]
|
|
|
|
mean value: 0.9204714866974258
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.85714286 0.72727273 1. 0.92857143 1.
|
|
0.83333333 0.9375 0.83333333 0.73684211]
|
|
|
|
mean value: 0.8687329118250171
|
|
|
|
key: train_precision
|
|
value: [0.9044586 0.97580645 0.77595628 0.98305085 0.99137931 0.99180328
|
|
0.89375 0.94039735 0.93959732 0.81609195]
|
|
|
|
mean value: 0.9212291391435611
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.75 1. 0.625 0.8125 0.625 1. 1. 0.9375 0.875 ]
|
|
|
|
mean value: 0.85625
|
|
|
|
key: train_recall
|
|
value: [1. 0.85211268 1. 0.81690141 0.80985915 0.85211268
|
|
1. 0.99300699 0.98591549 1. ]
|
|
|
|
mean value: 0.9309908401457697
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.8125 0.8125 0.8125 0.875 0.8125
|
|
0.90625 0.96875 0.86875 0.77083333]
|
|
|
|
mean value: 0.8514583333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.9471831 0.91549296 0.8556338 0.90140845 0.90140845 0.92253521
|
|
0.94014085 0.96481336 0.96148922 0.88811189]
|
|
|
|
mean value: 0.9198217275682065
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.66666667 0.72727273 0.625 0.76470588 0.625
|
|
0.83333333 0.9375 0.78947368 0.66666667]
|
|
|
|
mean value: 0.7425092644713388
|
|
|
|
key: train_jcc
|
|
value: [0.9044586 0.83448276 0.77595628 0.80555556 0.8041958 0.84615385
|
|
0.89375 0.93421053 0.92715232 0.81609195]
|
|
|
|
mean value: 0.8542007645624589
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14941049 0.13107944 0.13037682 0.13134885 0.13136578 0.13163543
|
|
0.1313796 0.13201165 0.13207245 0.13123918]
|
|
|
|
mean value: 0.13319196701049804
|
|
|
|
key: score_time
|
|
value: [0.01486826 0.01501536 0.01490831 0.01498747 0.01508451 0.01519775
|
|
0.01501441 0.01499581 0.01492596 0.01497769]
|
|
|
|
mean value: 0.014997553825378419
|
|
|
|
key: test_mcc
|
|
value: [0.81409158 0.81409158 0.93933644 0.93933644 0.8819171 1.
|
|
0.87083333 0.9372467 1. 0.87083333]
|
|
|
|
mean value: 0.906768649823811
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.90625 0.96875 0.96875 0.9375 1.
|
|
0.93548387 0.96774194 1. 0.93548387]
|
|
|
|
mean value: 0.9526209677419355
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90909091 0.96969697 0.96969697 0.94117647 1.
|
|
0.93333333 0.96551724 1. 0.9375 ]
|
|
|
|
mean value: 0.9535102802876636
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.88235294 0.94117647 0.94117647 0.88888889 1.
|
|
0.93333333 1. 1. 0.9375 ]
|
|
|
|
mean value: 0.9406781045751634
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 1. 1. 1. 1.
|
|
0.93333333 0.93333333 1. 0.9375 ]
|
|
|
|
mean value: 0.9679166666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.90625 0.96875 0.96875 0.9375 1.
|
|
0.93541667 0.96666667 1. 0.93541667]
|
|
|
|
mean value: 0.9525
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.83333333 0.94117647 0.94117647 0.88888889 1.
|
|
0.875 0.93333333 1. 0.88235294]
|
|
|
|
mean value: 0.912859477124183
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.21
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0401566 0.03634596 0.04769921 0.05428123 0.03971624 0.05849838
|
|
0.05273271 0.05906415 0.04415035 0.04030848]
|
|
|
|
mean value: 0.047295331954956055
|
|
|
|
key: score_time
|
|
value: [0.01692843 0.02299666 0.0224371 0.03471875 0.0253973 0.01851916
|
|
0.02662086 0.0234468 0.02398276 0.0300138 ]
|
|
|
|
mean value: 0.024506163597106934
|
|
|
|
key: test_mcc
|
|
value: [0.81409158 0.93933644 0.93933644 1. 0.93933644 0.93933644
|
|
0.80753845 1. 0.9375 0.80833333]
|
|
|
|
mean value: 0.9124809107704197
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99298237 0.9860133 0.9860133 1. 0.98591549
|
|
1. 0.98596474 0.9791626 0.98596474]
|
|
|
|
mean value: 0.9902016540079617
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.96875 0.96875 1. 0.96875 0.96875
|
|
0.90322581 1. 0.96774194 0.90322581]
|
|
|
|
mean value: 0.9555443548387097
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99647887 0.99295775 0.99295775 1. 0.99295775
|
|
1. 0.99298246 0.98947368 0.99298246]
|
|
|
|
mean value: 0.9950790709167284
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.96969697 0.96969697 1. 0.96774194 0.96774194
|
|
0.89655172 1. 0.96774194 0.90322581]
|
|
|
|
mean value: 0.9551488185526006
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99646643 0.9929078 0.9929078 1. 0.99295775
|
|
1. 0.99300699 0.98932384 0.99295775]
|
|
|
|
mean value: 0.9950528363313396
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.94117647 0.94117647 1. 1. 1.
|
|
0.92857143 1. 1. 0.93333333]
|
|
|
|
mean value: 0.9626610644257703
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.99295775
|
|
1. 0.99300699 1. 0.99295775]
|
|
|
|
mean value: 0.997892248596474
|
|
|
|
key: test_recall
|
|
value: [0.9375 1. 1. 1. 0.9375 0.9375
|
|
0.86666667 1. 0.9375 0.875 ]
|
|
|
|
mean value: 0.9491666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.99295775 0.98591549 0.98591549 1. 0.99295775
|
|
1. 0.99300699 0.97887324 0.99295775]
|
|
|
|
mean value: 0.9922584457795726
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.96875 0.96875 1. 0.96875 0.96875
|
|
0.90208333 1. 0.96875 0.90416667]
|
|
|
|
mean value: 0.955625
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99647887 0.99295775 0.99295775 1. 0.99295775
|
|
1. 0.99298237 0.98943662 0.99298237]
|
|
|
|
mean value: 0.9950753471880233
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.94117647 0.94117647 1. 0.9375 0.9375
|
|
0.8125 1. 0.9375 0.82352941]
|
|
|
|
mean value: 0.9164215686274509
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99295775 0.98591549 0.98591549 1. 0.98601399
|
|
1. 0.98611111 0.97887324 0.98601399]
|
|
|
|
mean value: 0.990180105497007
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.21
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06097364 0.08079934 0.08000755 0.07937479 0.07814407 0.07967234
|
|
0.09900784 0.10173464 0.09186673 0.07092118]
|
|
|
|
mean value: 0.08225021362304688
|
|
|
|
key: score_time
|
|
value: [0.01837087 0.02525473 0.02141833 0.02244473 0.02498221 0.02209568
|
|
0.02377081 0.02225614 0.02123618 0.0251863 ]
|
|
|
|
mean value: 0.022701597213745116
|
|
|
|
key: test_mcc
|
|
value: [0.68884672 0.38729833 0.77459667 0.37796447 0.38729833 0.62994079
|
|
0.61925228 0.48333333 0.35445878 0.61925228]
|
|
|
|
mean value: 0.5322241996805657
|
|
|
|
key: train_mcc
|
|
value: [0.97889751 0.97183099 0.97889751 0.9860133 0.99298237 0.98591549
|
|
0.98606255 0.9789707 0.98596474 0.98596474]
|
|
|
|
mean value: 0.9831499896159636
|
|
|
|
key: test_accuracy
|
|
value: [0.84375 0.6875 0.875 0.6875 0.6875 0.8125
|
|
0.80645161 0.74193548 0.67741935 0.80645161]
|
|
|
|
mean value: 0.7626008064516129
|
|
|
|
key: train_accuracy
|
|
value: [0.98943662 0.98591549 0.98943662 0.99295775 0.99647887 0.99295775
|
|
0.99298246 0.98947368 0.99298246 0.99298246]
|
|
|
|
mean value: 0.9915604151223129
|
|
|
|
key: test_fscore
|
|
value: [0.83870968 0.72222222 0.88888889 0.70588235 0.72222222 0.82352941
|
|
0.8125 0.73333333 0.70588235 0.8 ]
|
|
|
|
mean value: 0.7753170461733081
|
|
|
|
key: train_fscore
|
|
value: [0.98939929 0.98591549 0.98939929 0.9929078 0.99649123 0.99295775
|
|
0.99295775 0.98954704 0.99295775 0.99295775]
|
|
|
|
mean value: 0.9915491133261819
|
|
|
|
key: test_precision
|
|
value: [0.86666667 0.65 0.8 0.66666667 0.65 0.77777778
|
|
0.76470588 0.73333333 0.66666667 0.85714286]
|
|
|
|
mean value: 0.743295985060691
|
|
|
|
key: train_precision
|
|
value: [0.9929078 0.98591549 0.9929078 1. 0.99300699 0.99295775
|
|
1. 0.98611111 0.99295775 0.99295775]
|
|
|
|
mean value: 0.992972243934935
|
|
|
|
key: test_recall
|
|
value: [0.8125 0.8125 1. 0.75 0.8125 0.875
|
|
0.86666667 0.73333333 0.75 0.75 ]
|
|
|
|
mean value: 0.81625
|
|
|
|
key: train_recall
|
|
value: [0.98591549 0.98591549 0.98591549 0.98591549 1. 0.99295775
|
|
0.98601399 0.99300699 0.99295775 0.99295775]
|
|
|
|
mean value: 0.9901556190288585
|
|
|
|
key: test_roc_auc
|
|
value: [0.84375 0.6875 0.875 0.6875 0.6875 0.8125
|
|
0.80833333 0.74166667 0.675 0.80833333]
|
|
|
|
mean value: 0.7627083333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.98943662 0.98591549 0.98943662 0.99295775 0.99647887 0.99295775
|
|
0.99300699 0.98946124 0.99298237 0.99298237]
|
|
|
|
mean value: 0.9915616074066779
|
|
|
|
key: test_jcc
|
|
value: [0.72222222 0.56521739 0.8 0.54545455 0.56521739 0.7
|
|
0.68421053 0.57894737 0.54545455 0.66666667]
|
|
|
|
mean value: 0.6373390657143517
|
|
|
|
key: train_jcc
|
|
value: [0.97902098 0.97222222 0.97902098 0.98591549 0.99300699 0.98601399
|
|
0.98601399 0.97931034 0.98601399 0.98601399]
|
|
|
|
mean value: 0.983255295511245
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.46722603 0.45420337 0.46067643 0.4648478 0.47499108 0.46509981
|
|
0.46426177 0.47671127 0.45206094 0.47163463]
|
|
|
|
mean value: 0.4651713132858276
|
|
|
|
key: score_time
|
|
value: [0.00948572 0.00930142 0.00971889 0.00974631 0.01031852 0.00943613
|
|
0.00939226 0.0101552 0.0094986 0.0096755 ]
|
|
|
|
mean value: 0.009672856330871582
|
|
|
|
key: test_mcc
|
|
value: [0.81409158 0.93933644 0.93933644 1. 1. 1.
|
|
0.87083333 1. 0.87770745 0.80833333]
|
|
|
|
mean value: 0.9249638569805321
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.96875 0.96875 1. 1. 1.
|
|
0.93548387 1. 0.93548387 0.90322581]
|
|
|
|
mean value: 0.9617943548387097
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.96969697 0.96969697 1. 1. 1.
|
|
0.93333333 1. 0.94117647 0.90322581]
|
|
|
|
mean value: 0.962622045885803
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.94117647 0.94117647 1. 1. 1.
|
|
0.93333333 1. 0.88888889 0.93333333]
|
|
|
|
mean value: 0.9520261437908497
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.9375 1. 1. 1. 1. 1.
|
|
0.93333333 1. 1. 0.875 ]
|
|
|
|
mean value: 0.9745833333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.96875 0.96875 1. 1. 1.
|
|
0.93541667 1. 0.93333333 0.90416667]
|
|
|
|
mean value: 0.9616666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.94117647 0.94117647 1. 1. 1.
|
|
0.875 1. 0.88888889 0.82352941]
|
|
|
|
mean value: 0.9303104575163399
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.21
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02697706 0.02417254 0.03006434 0.02474928 0.02507758 0.02507782
|
|
0.02419448 0.02416945 0.02572727 0.03336573]
|
|
|
|
mean value: 0.026357555389404298
|
|
|
|
key: score_time
|
|
value: [0.01423049 0.01677632 0.01525044 0.01756954 0.01650715 0.01706696
|
|
0.01746702 0.01718903 0.017488 0.02302551]
|
|
|
|
mean value: 0.017257046699523926
|
|
|
|
key: test_mcc
|
|
value: [ 0.34752402 0.48038446 0.62554324 -0.13483997 0.07559289 0.32163376
|
|
-0.01581139 0.42352151 0.34258008 0.31407213]
|
|
|
|
mean value: 0.27802007453897526
|
|
|
|
key: train_mcc
|
|
value: [0.60447052 0.5990423 0.90582163 0.50659369 0.55022931 0.61533794
|
|
0.53458607 0.56718079 0.66601556 0.60091052]
|
|
|
|
mean value: 0.6150188328105437
|
|
|
|
key: test_accuracy
|
|
value: [0.65625 0.6875 0.78125 0.4375 0.53125 0.59375
|
|
0.48387097 0.67741935 0.64516129 0.64516129]
|
|
|
|
mean value: 0.6139112903225806
|
|
|
|
key: train_accuracy
|
|
value: [0.76760563 0.76408451 0.95070423 0.70422535 0.73239437 0.77464789
|
|
0.72280702 0.74385965 0.80701754 0.76491228]
|
|
|
|
mean value: 0.7732258463059056
|
|
|
|
key: test_fscore
|
|
value: [0.71794872 0.76190476 0.82051282 0.52631579 0.63414634 0.71111111
|
|
0.6 0.73684211 0.73170732 0.71794872]
|
|
|
|
mean value: 0.6958437682699556
|
|
|
|
key: train_fscore
|
|
value: [0.81142857 0.80911681 0.95302013 0.77173913 0.78888889 0.81609195
|
|
0.78356164 0.79665738 0.83775811 0.80911681]
|
|
|
|
mean value: 0.8177379434782648
|
|
|
|
key: test_precision
|
|
value: [0.60869565 0.61538462 0.69565217 0.45454545 0.52 0.55172414
|
|
0.48 0.60869565 0.6 0.60869565]
|
|
|
|
mean value: 0.5743393338295887
|
|
|
|
key: train_precision
|
|
value: [0.68269231 0.67942584 0.91025641 0.62831858 0.65137615 0.68932039
|
|
0.64414414 0.66203704 0.72081218 0.67942584]
|
|
|
|
mean value: 0.6947808875721466
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 1. 0.625 0.8125 1.
|
|
0.8 0.93333333 0.9375 0.875 ]
|
|
|
|
mean value: 0.8858333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.65625 0.6875 0.78125 0.4375 0.53125 0.59375
|
|
0.49375 0.68541667 0.63541667 0.6375 ]
|
|
|
|
mean value: 0.6139583333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.76760563 0.76408451 0.95070423 0.70422535 0.73239437 0.77464789
|
|
0.72183099 0.74295775 0.80769231 0.76573427]
|
|
|
|
mean value: 0.7731877277651925
|
|
|
|
key: test_jcc
|
|
value: [0.56 0.61538462 0.69565217 0.35714286 0.46428571 0.55172414
|
|
0.42857143 0.58333333 0.57692308 0.56 ]
|
|
|
|
mean value: 0.5393017337485104
|
|
|
|
key: train_jcc
|
|
value: [0.68269231 0.67942584 0.91025641 0.62831858 0.65137615 0.68932039
|
|
0.64414414 0.66203704 0.72081218 0.67942584]
|
|
|
|
mean value: 0.6947808875721466
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.35
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02332354 0.04450059 0.02891874 0.03595114 0.03665876 0.03526258
|
|
0.03499365 0.03509426 0.03193188 0.03515124]
|
|
|
|
mean value: 0.034178638458251955
|
|
|
|
key: score_time
|
|
value: [0.02395391 0.03560877 0.0217123 0.0207777 0.02317023 0.02268767
|
|
0.02342653 0.02022815 0.02111912 0.02195239]
|
|
|
|
mean value: 0.023463678359985352
|
|
|
|
key: test_mcc
|
|
value: [0.81409158 0.75592895 0.93933644 0.82717019 0.875 0.81409158
|
|
0.9375 1. 0.80753845 0.80833333]
|
|
|
|
mean value: 0.8578990514118684
|
|
|
|
key: train_mcc
|
|
value: [0.95812669 0.94450549 0.94450549 0.93040839 0.95129413 0.93775982
|
|
0.95145657 0.92390856 0.93798423 0.93065917]
|
|
|
|
mean value: 0.9410608551595332
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.875 0.96875 0.90625 0.9375 0.90625
|
|
0.96774194 1. 0.90322581 0.90322581]
|
|
|
|
mean value: 0.9274193548387096
|
|
|
|
key: train_accuracy
|
|
value: [0.97887324 0.97183099 0.97183099 0.96478873 0.97535211 0.96830986
|
|
0.9754386 0.96140351 0.96842105 0.96491228]
|
|
|
|
mean value: 0.9701161354089449
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.88235294 0.96969697 0.91428571 0.9375 0.90322581
|
|
0.96774194 1. 0.90909091 0.90322581]
|
|
|
|
mean value: 0.9296210991728069
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:175: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:178: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.97916667 0.97241379 0.97241379 0.96551724 0.97577855 0.96907216
|
|
0.97594502 0.96245734 0.96907216 0.96551724]
|
|
|
|
mean value: 0.9707353967307983
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.83333333 0.94117647 0.84210526 0.9375 0.93333333
|
|
0.9375 1. 0.88235294 0.93333333]
|
|
|
|
mean value: 0.9122987616099071
|
|
|
|
key: train_precision
|
|
value: [0.96575342 0.9527027 0.9527027 0.94594595 0.95918367 0.94630872
|
|
0.95945946 0.94 0.94630872 0.94594595]
|
|
|
|
mean value: 0.9514311304548109
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 1. 1. 0.9375 0.875 1. 1. 0.9375 0.875 ]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [0.99295775 0.99295775 0.99295775 0.98591549 0.99295775 0.99295775
|
|
0.99300699 0.98601399 0.99295775 0.98591549]
|
|
|
|
mean value: 0.9908598443809712
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.875 0.96875 0.90625 0.9375 0.90625
|
|
0.96875 1. 0.90208333 0.90416667]
|
|
|
|
mean value: 0.9275
|
|
|
|
key: train_roc_auc
|
|
value: [0.97887324 0.97183099 0.97183099 0.96478873 0.97535211 0.96830986
|
|
0.97537674 0.96131685 0.96850685 0.96498572]
|
|
|
|
mean value: 0.970117206736925
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.78947368 0.94117647 0.84210526 0.88235294 0.82352941
|
|
0.9375 1. 0.83333333 0.82352941]
|
|
|
|
mean value: 0.8706333849329205
|
|
|
|
key: train_jcc
|
|
value: [0.95918367 0.94630872 0.94630872 0.93333333 0.9527027 0.94
|
|
0.95302013 0.92763158 0.94 0.93333333]
|
|
|
|
mean value: 0.9431822205678743
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.26468396 0.23707485 0.23484063 0.23813033 0.29126167 0.26690769
|
|
0.24119735 0.23906517 0.23730087 0.24584031]
|
|
|
|
mean value: 0.24963028430938722
|
|
|
|
key: score_time
|
|
value: [0.02218151 0.0205338 0.02146173 0.02315283 0.02370119 0.01999259
|
|
0.0225513 0.02335763 0.02196383 0.02384853]
|
|
|
|
mean value: 0.022274494171142578
|
|
|
|
key: test_mcc
|
|
value: [0.81409158 0.75592895 0.93933644 0.82717019 0.875 0.81409158
|
|
0.9375 1. 0.80753845 0.80833333]
|
|
|
|
mean value: 0.8578990514118684
|
|
|
|
key: train_mcc
|
|
value: [0.95812669 0.94450549 0.94450549 0.93040839 0.95129413 0.93775982
|
|
0.95145657 0.92390856 0.9582759 0.93065917]
|
|
|
|
mean value: 0.9430900221063301
|
|
|
|
key: test_accuracy
|
|
value: [0.90625 0.875 0.96875 0.90625 0.9375 0.90625
|
|
0.96774194 1. 0.90322581 0.90322581]
|
|
|
|
mean value: 0.9274193548387096
|
|
|
|
key: train_accuracy
|
|
value: [0.97887324 0.97183099 0.97183099 0.96478873 0.97535211 0.96830986
|
|
0.9754386 0.96140351 0.97894737 0.96491228]
|
|
|
|
mean value: 0.9711687669878923
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.88235294 0.96969697 0.91428571 0.9375 0.90322581
|
|
0.96774194 1. 0.90909091 0.90322581]
|
|
|
|
mean value: 0.9296210991728069
|
|
|
|
key: train_fscore
|
|
value: [0.97916667 0.97241379 0.97241379 0.96551724 0.97577855 0.96907216
|
|
0.97594502 0.96245734 0.97916667 0.96551724]
|
|
|
|
mean value: 0.9717448469026196
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.83333333 0.94117647 0.84210526 0.9375 0.93333333
|
|
0.9375 1. 0.88235294 0.93333333]
|
|
|
|
mean value: 0.9122987616099071
|
|
|
|
key: train_precision
|
|
value: [0.96575342 0.9527027 0.9527027 0.94594595 0.95918367 0.94630872
|
|
0.95945946 0.94 0.96575342 0.94594595]
|
|
|
|
mean value: 0.9533756004373428
|
|
|
|
key: test_recall
|
|
value: [0.9375 0.9375 1. 1. 0.9375 0.875 1. 1. 0.9375 0.875 ]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [0.99295775 0.99295775 0.99295775 0.98591549 0.99295775 0.99295775
|
|
0.99300699 0.98601399 0.99295775 0.98591549]
|
|
|
|
mean value: 0.9908598443809712
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.875 0.96875 0.90625 0.9375 0.90625
|
|
0.96875 1. 0.90208333 0.90416667]
|
|
|
|
mean value: 0.9275
|
|
|
|
key: train_roc_auc
|
|
value: [0.97887324 0.97183099 0.97183099 0.96478873 0.97535211 0.96830986
|
|
0.97537674 0.96131685 0.97899636 0.96498572]
|
|
|
|
mean value: 0.9711661577858761
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.78947368 0.94117647 0.84210526 0.88235294 0.82352941
|
|
0.9375 1. 0.83333333 0.82352941]
|
|
|
|
mean value: 0.8706333849329205
|
|
|
|
key: train_jcc
|
|
value: [0.95918367 0.94630872 0.94630872 0.93333333 0.9527027 0.94
|
|
0.95302013 0.92763158 0.95918367 0.93333333]
|
|
|
|
mean value: 0.9451005879148131
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03810716 0.03709626 0.0367434 0.04261827 0.03460145 0.05663276
|
|
0.05360341 0.04755831 0.0376575 0.03705287]
|
|
|
|
mean value: 0.042167139053344724
|
|
|
|
key: score_time
|
|
value: [0.01188159 0.01441073 0.01452422 0.01903272 0.01184082 0.01493359
|
|
0.01481843 0.01497293 0.01458144 0.01462245]
|
|
|
|
mean value: 0.014561891555786133
|
|
|
|
key: test_mcc
|
|
value: [0.81325006 0.83914639 0.83914639 0.74348441 0.77459667 0.87096774
|
|
0.69047575 0.84983659 0.73763441 0.6844511 ]
|
|
|
|
mean value: 0.7842989510343097
|
|
|
|
key: train_mcc
|
|
value: [0.8599849 0.85666952 0.842796 0.89965316 0.84251189 0.84624951
|
|
0.83935221 0.86718143 0.84417004 0.8393479 ]
|
|
|
|
mean value: 0.8537916563444095
|
|
|
|
key: test_accuracy
|
|
value: [0.90322581 0.91935484 0.91935484 0.87096774 0.88709677 0.93548387
|
|
0.83870968 0.91935484 0.86885246 0.83606557]
|
|
|
|
mean value: 0.8898466419883659
|
|
|
|
key: train_accuracy
|
|
value: [0.92985612 0.92805755 0.92086331 0.94964029 0.92086331 0.92266187
|
|
0.91906475 0.93345324 0.92100539 0.91921005]
|
|
|
|
mean value: 0.926467587151105
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.92063492 0.92063492 0.86666667 0.88888889 0.93548387
|
|
0.85294118 0.92537313 0.86666667 0.85294118]
|
|
|
|
mean value: 0.8939322330820249
|
|
|
|
key: train_fscore
|
|
value: [0.93072824 0.92932862 0.92280702 0.95035461 0.92253521 0.92442882
|
|
0.92119089 0.93428064 0.92387543 0.92091388]
|
|
|
|
mean value: 0.9280443373841807
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.90625 0.90625 0.89655172 0.875 0.93548387
|
|
0.78378378 0.86111111 0.86666667 0.78378378]
|
|
|
|
mean value: 0.8672023797593875
|
|
|
|
key: train_precision
|
|
value: [0.91929825 0.91319444 0.90068493 0.93706294 0.90344828 0.90378007
|
|
0.89761092 0.92280702 0.89297659 0.90034364]
|
|
|
|
mean value: 0.909120707350487
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 0.93548387 0.83870968 0.90322581 0.93548387
|
|
0.93548387 1. 0.86666667 0.93548387]
|
|
|
|
mean value: 0.9253763440860214
|
|
|
|
key: train_recall
|
|
value: [0.94244604 0.94604317 0.94604317 0.96402878 0.94244604 0.94604317
|
|
0.94604317 0.94604317 0.95698925 0.94244604]
|
|
|
|
mean value: 0.9478571981124778
|
|
|
|
key: test_roc_auc
|
|
value: [0.90322581 0.91935484 0.91935484 0.87096774 0.88709677 0.93548387
|
|
0.83870968 0.91935484 0.8688172 0.8344086 ]
|
|
|
|
mean value: 0.8896774193548387
|
|
|
|
key: train_roc_auc
|
|
value: [0.92985612 0.92805755 0.92086331 0.94964029 0.92086331 0.92266187
|
|
0.91906475 0.93345324 0.92094067 0.9192517 ]
|
|
|
|
mean value: 0.9264652793893916
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.85294118 0.85294118 0.76470588 0.8 0.87878788
|
|
0.74358974 0.86111111 0.76470588 0.74358974]
|
|
|
|
mean value: 0.809570592805887
|
|
|
|
key: train_jcc
|
|
value: [0.87043189 0.8679868 0.85667752 0.90540541 0.85620915 0.85947712
|
|
0.8538961 0.87666667 0.8585209 0.8534202 ]
|
|
|
|
mean value: 0.8658691763036805
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.89820576 0.95263267 0.95077991 0.91781855 1.08768177 0.94778204
|
|
1.01672673 0.92168379 1.05719733 0.91310978]
|
|
|
|
mean value: 0.9663618326187133
|
|
|
|
key: score_time
|
|
value: [0.01472855 0.02327847 0.01604557 0.01537609 0.01533103 0.01232958
|
|
0.01567435 0.01526046 0.01890039 0.01533055]
|
|
|
|
mean value: 0.016225504875183105
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.87096774 0.84983659 0.90748521 0.90369611 0.96824584
|
|
0.81325006 0.84266484 0.67314268 0.96770777]
|
|
|
|
mean value: 0.876524268542267
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.99640932 0.97482645 0.97482645 0.97487691
|
|
1. 1. 1. 0.99641572]
|
|
|
|
mean value: 0.9917354859485006
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.93548387 0.91935484 0.9516129 0.9516129 0.98387097
|
|
0.90322581 0.91935484 0.83606557 0.98360656]
|
|
|
|
mean value: 0.9368059227921735
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.99820144 0.98741007 0.98741007 0.98741007
|
|
1. 1. 1. 0.99820467]
|
|
|
|
mean value: 0.9958636322539813
|
|
|
|
key: test_fscore
|
|
value: [0.98360656 0.93548387 0.9122807 0.94915254 0.95238095 0.98360656
|
|
0.90909091 0.91525424 0.82758621 0.98412698]
|
|
|
|
mean value: 0.935256951963264
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99820467 0.98743268 0.98743268 0.98747764
|
|
1. 1. 1. 0.9981982 ]
|
|
|
|
mean value: 0.9958745854791948
|
|
|
|
key: test_precision
|
|
value: [1. 0.93548387 1. 1. 0.9375 1.
|
|
0.85714286 0.96428571 0.85714286 0.96875 ]
|
|
|
|
mean value: 0.952030529953917
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.99641577 0.98566308 0.98566308 0.98220641
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9949948341177821
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 0.83870968 0.90322581 0.96774194 0.96774194
|
|
0.96774194 0.87096774 0.8 1. ]
|
|
|
|
mean value: 0.9219354838709678
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.98920863 0.98920863 0.99280576
|
|
1. 1. 1. 0.99640288]
|
|
|
|
mean value: 0.9967625899280576
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.93548387 0.91935484 0.9516129 0.9516129 0.98387097
|
|
0.90322581 0.91935484 0.83548387 0.98333333]
|
|
|
|
mean value: 0.9367204301075269
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.99820144 0.98741007 0.98741007 0.98741007
|
|
1. 1. 1. 0.99820144]
|
|
|
|
mean value: 0.995863309352518
|
|
|
|
key: test_jcc
|
|
value: [0.96774194 0.87878788 0.83870968 0.90322581 0.90909091 0.96774194
|
|
0.83333333 0.84375 0.70588235 0.96875 ]
|
|
|
|
mean value: 0.8817013828992007
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.99641577 0.9751773 0.9751773 0.97526502
|
|
1. 1. 1. 0.99640288]
|
|
|
|
mean value: 0.9918438275904083
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.41
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02257085 0.0112493 0.01054573 0.01036143 0.01050878 0.01036024
|
|
0.01019192 0.01051331 0.01032925 0.01022053]
|
|
|
|
mean value: 0.01168513298034668
|
|
|
|
key: score_time
|
|
value: [0.01106262 0.00959563 0.00914097 0.00893879 0.00901771 0.00893164
|
|
0.00887775 0.00896096 0.00892639 0.00891471]
|
|
|
|
mean value: 0.009236717224121093
|
|
|
|
key: test_mcc
|
|
value: [0.58338335 0.5483871 0.74193548 0.51639778 0.54953196 0.54953196
|
|
0.51119863 0.64549722 0.54459739 0.60733867]
|
|
|
|
mean value: 0.5797799541660318
|
|
|
|
key: train_mcc
|
|
value: [0.58633473 0.58998913 0.5865169 0.57494473 0.62591548 0.62262853
|
|
0.57914044 0.60433219 0.60507789 0.59092789]
|
|
|
|
mean value: 0.5965807894919312
|
|
|
|
key: test_accuracy
|
|
value: [0.79032258 0.77419355 0.87096774 0.75806452 0.77419355 0.77419355
|
|
0.74193548 0.82258065 0.7704918 0.80327869]
|
|
|
|
mean value: 0.7880222104706505
|
|
|
|
key: train_accuracy
|
|
value: [0.79316547 0.79496403 0.79316547 0.78417266 0.81294964 0.81115108
|
|
0.78956835 0.80215827 0.80251346 0.79533214]
|
|
|
|
mean value: 0.7979140565465043
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.77419355 0.87096774 0.75409836 0.76666667 0.76666667
|
|
0.77777778 0.82539683 0.75 0.8125 ]
|
|
|
|
mean value: 0.7898267587486255
|
|
|
|
key: train_fscore
|
|
value: [0.7935368 0.79642857 0.79573712 0.7993311 0.81227437 0.81415929
|
|
0.78918919 0.80286738 0.80427046 0.79787234]
|
|
|
|
mean value: 0.8005666638001188
|
|
|
|
key: test_precision
|
|
value: [0.76470588 0.77419355 0.87096774 0.76666667 0.79310345 0.79310345
|
|
0.68292683 0.8125 0.80769231 0.78787879]
|
|
|
|
mean value: 0.78537386607333
|
|
|
|
key: train_precision
|
|
value: [0.7921147 0.79078014 0.78596491 0.746875 0.81521739 0.80139373
|
|
0.79061372 0.8 0.79858657 0.78671329]
|
|
|
|
mean value: 0.7908259446555521
|
|
|
|
key: test_recall
|
|
value: [0.83870968 0.77419355 0.87096774 0.74193548 0.74193548 0.74193548
|
|
0.90322581 0.83870968 0.7 0.83870968]
|
|
|
|
mean value: 0.7990322580645162
|
|
|
|
key: train_recall
|
|
value: [0.79496403 0.80215827 0.8057554 0.85971223 0.80935252 0.82733813
|
|
0.78776978 0.8057554 0.81003584 0.80935252]
|
|
|
|
mean value: 0.8112194115675202
|
|
|
|
key: test_roc_auc
|
|
value: [0.79032258 0.77419355 0.87096774 0.75806452 0.77419355 0.77419355
|
|
0.74193548 0.82258065 0.76935484 0.80268817]
|
|
|
|
mean value: 0.7878494623655914
|
|
|
|
key: train_roc_auc
|
|
value: [0.79316547 0.79496403 0.79316547 0.78417266 0.81294964 0.81115108
|
|
0.78956835 0.80215827 0.80249994 0.79535726]
|
|
|
|
mean value: 0.7979152162141254
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.63157895 0.77142857 0.60526316 0.62162162 0.62162162
|
|
0.63636364 0.7027027 0.6 0.68421053]
|
|
|
|
mean value: 0.6541457451983768
|
|
|
|
key: train_jcc
|
|
value: [0.6577381 0.66172107 0.66076696 0.66573816 0.68389058 0.68656716
|
|
0.65178571 0.67065868 0.67261905 0.66371681]
|
|
|
|
mean value: 0.6675202287084647
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01051378 0.01047087 0.01060367 0.01055789 0.01053548 0.01041293
|
|
0.01049829 0.01054573 0.01057553 0.01064825]
|
|
|
|
mean value: 0.01053624153137207
|
|
|
|
key: score_time
|
|
value: [0.00896335 0.00887966 0.00899315 0.00901151 0.00903106 0.00891781
|
|
0.00896287 0.00900006 0.00901461 0.00900006]
|
|
|
|
mean value: 0.008977413177490234
|
|
|
|
key: test_mcc
|
|
value: [0.61418277 0.64549722 0.7190925 0.61418277 0.67883359 0.64549722
|
|
0.56761348 0.64820372 0.61256703 0.60733867]
|
|
|
|
mean value: 0.6353008987490497
|
|
|
|
key: train_mcc
|
|
value: [0.67625899 0.66202471 0.66557529 0.69446479 0.683524 0.65510022
|
|
0.68741069 0.6870548 0.67744343 0.68448223]
|
|
|
|
mean value: 0.6773339149399755
|
|
|
|
key: test_accuracy
|
|
value: [0.80645161 0.82258065 0.85483871 0.80645161 0.83870968 0.82258065
|
|
0.77419355 0.82258065 0.80327869 0.80327869]
|
|
|
|
mean value: 0.8154944473823373
|
|
|
|
key: train_accuracy
|
|
value: [0.8381295 0.83093525 0.83273381 0.8471223 0.84172662 0.82733813
|
|
0.84352518 0.84352518 0.83842011 0.84201077]
|
|
|
|
mean value: 0.8385466850935769
|
|
|
|
key: test_fscore
|
|
value: [0.8125 0.82539683 0.84210526 0.8 0.83333333 0.81967213
|
|
0.8 0.83076923 0.8125 0.8125 ]
|
|
|
|
mean value: 0.8188776783804825
|
|
|
|
key: train_fscore
|
|
value: [0.8381295 0.83274021 0.8342246 0.84902309 0.84285714 0.83038869
|
|
0.8460177 0.8438061 0.84210526 0.84452297]
|
|
|
|
mean value: 0.8403815269479368
|
|
|
|
key: test_precision
|
|
value: [0.78787879 0.8125 0.92307692 0.82758621 0.86206897 0.83333333
|
|
0.71794872 0.79411765 0.76470588 0.78787879]
|
|
|
|
mean value: 0.8111095251942108
|
|
|
|
key: train_precision
|
|
value: [0.8381295 0.82394366 0.82685512 0.83859649 0.83687943 0.81597222
|
|
0.83275261 0.84229391 0.82474227 0.82986111]
|
|
|
|
mean value: 0.8310026327326828
|
|
|
|
key: test_recall
|
|
value: [0.83870968 0.83870968 0.77419355 0.77419355 0.80645161 0.80645161
|
|
0.90322581 0.87096774 0.86666667 0.83870968]
|
|
|
|
mean value: 0.8318279569892473
|
|
|
|
key: train_recall
|
|
value: [0.8381295 0.84172662 0.84172662 0.85971223 0.84892086 0.84532374
|
|
0.85971223 0.84532374 0.86021505 0.85971223]
|
|
|
|
mean value: 0.8500502823547613
|
|
|
|
key: test_roc_auc
|
|
value: [0.80645161 0.82258065 0.85483871 0.80645161 0.83870968 0.82258065
|
|
0.77419355 0.82258065 0.80430108 0.80268817]
|
|
|
|
mean value: 0.8155376344086022
|
|
|
|
key: train_roc_auc
|
|
value: [0.8381295 0.83093525 0.83273381 0.8471223 0.84172662 0.82733813
|
|
0.84352518 0.84352518 0.83838091 0.8420425 ]
|
|
|
|
mean value: 0.8385459374435935
|
|
|
|
key: test_jcc
|
|
value: [0.68421053 0.7027027 0.72727273 0.66666667 0.71428571 0.69444444
|
|
0.66666667 0.71052632 0.68421053 0.68421053]
|
|
|
|
mean value: 0.6935196816775764
|
|
|
|
key: train_jcc
|
|
value: [0.72136223 0.71341463 0.71559633 0.73765432 0.72839506 0.70996979
|
|
0.73312883 0.72981366 0.72727273 0.73088685]
|
|
|
|
mean value: 0.7247494441137159
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00978923 0.00980663 0.01080918 0.01040244 0.01147366 0.01142359
|
|
0.01133847 0.01122069 0.01150775 0.01156354]
|
|
|
|
mean value: 0.010933518409729004
|
|
|
|
key: score_time
|
|
value: [0.01632094 0.01658916 0.01412439 0.01506782 0.01708412 0.01405382
|
|
0.01692677 0.01375461 0.01690364 0.01371646]
|
|
|
|
mean value: 0.01545417308807373
|
|
|
|
key: test_mcc
|
|
value: [0.48488114 0.42289003 0.55301004 0.52297636 0.5809475 0.52297636
|
|
0.51639778 0.42023032 0.63939757 0.27849462]
|
|
|
|
mean value: 0.49422017278609187
|
|
|
|
key: train_mcc
|
|
value: [0.68522881 0.73158497 0.71447096 0.73033396 0.68805267 0.69663288
|
|
0.72550886 0.71313508 0.69535672 0.67478102]
|
|
|
|
mean value: 0.705508594382046
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.70967742 0.77419355 0.75806452 0.79032258 0.75806452
|
|
0.75806452 0.70967742 0.81967213 0.63934426]
|
|
|
|
mean value: 0.7459016393442623
|
|
|
|
key: train_accuracy
|
|
value: [0.84172662 0.86510791 0.85611511 0.86510791 0.84352518 0.8471223
|
|
0.86151079 0.85611511 0.84739677 0.83662478]
|
|
|
|
mean value: 0.8520352479237435
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.72727273 0.75862069 0.73684211 0.78688525 0.73684211
|
|
0.75409836 0.7 0.81355932 0.64516129]
|
|
|
|
mean value: 0.7409281846368071
|
|
|
|
key: train_fscore
|
|
value: [0.8358209 0.86085343 0.85018727 0.86388385 0.83918669 0.84052533
|
|
0.85553471 0.85239852 0.84460695 0.83054004]
|
|
|
|
mean value: 0.8473537678320475
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.68571429 0.81481481 0.80769231 0.8 0.80769231
|
|
0.76666667 0.72413793 0.82758621 0.64516129]
|
|
|
|
mean value: 0.7606738538106725
|
|
|
|
key: train_precision
|
|
value: [0.86821705 0.88888889 0.88671875 0.87179487 0.86311787 0.87843137
|
|
0.89411765 0.875 0.8619403 0.86100386]
|
|
|
|
mean value: 0.8749230614788926
|
|
|
|
key: test_recall
|
|
value: [0.77419355 0.77419355 0.70967742 0.67741935 0.77419355 0.67741935
|
|
0.74193548 0.67741935 0.8 0.64516129]
|
|
|
|
mean value: 0.7251612903225806
|
|
|
|
key: train_recall
|
|
value: [0.8057554 0.83453237 0.81654676 0.85611511 0.81654676 0.8057554
|
|
0.82014388 0.83093525 0.82795699 0.80215827]
|
|
|
|
mean value: 0.8216446197880406
|
|
|
|
key: test_roc_auc
|
|
value: [0.74193548 0.70967742 0.77419355 0.75806452 0.79032258 0.75806452
|
|
0.75806452 0.70967742 0.81935484 0.63924731]
|
|
|
|
mean value: 0.7458602150537634
|
|
|
|
key: train_roc_auc
|
|
value: [0.84172662 0.86510791 0.85611511 0.86510791 0.84352518 0.8471223
|
|
0.86151079 0.85611511 0.84743173 0.83656301]
|
|
|
|
mean value: 0.8520325674943916
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.57142857 0.61111111 0.58333333 0.64864865 0.58333333
|
|
0.60526316 0.53846154 0.68571429 0.47619048]
|
|
|
|
mean value: 0.5903484456116035
|
|
|
|
key: train_jcc
|
|
value: [0.71794872 0.75570033 0.73941368 0.76038339 0.72292994 0.72491909
|
|
0.74754098 0.74276527 0.73101266 0.71019108]
|
|
|
|
mean value: 0.7352805139150561
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02437425 0.02450132 0.02496505 0.02432227 0.02489543 0.02445531
|
|
0.0243113 0.02456117 0.02471304 0.02392483]
|
|
|
|
mean value: 0.02450239658355713
|
|
|
|
key: score_time
|
|
value: [0.01235843 0.01249051 0.01268172 0.01241851 0.01266956 0.0124321
|
|
0.01242375 0.01243353 0.01217508 0.01202798]
|
|
|
|
mean value: 0.012411117553710938
|
|
|
|
key: test_mcc
|
|
value: [0.68313005 0.77784447 0.83914639 0.71004695 0.77459667 0.77459667
|
|
0.64751827 0.78446454 0.67384323 0.65552656]
|
|
|
|
mean value: 0.73207137931005
|
|
|
|
key: train_mcc
|
|
value: [0.79209132 0.78094965 0.78547437 0.84774592 0.77493517 0.79541168
|
|
0.80491779 0.77493517 0.7996419 0.80193561]
|
|
|
|
mean value: 0.7958038569876071
|
|
|
|
key: test_accuracy
|
|
value: [0.83870968 0.88709677 0.91935484 0.85483871 0.88709677 0.88709677
|
|
0.80645161 0.88709677 0.83606557 0.81967213]
|
|
|
|
mean value: 0.8623479640401903
|
|
|
|
key: train_accuracy
|
|
value: [0.89388489 0.88848921 0.89028777 0.92266187 0.88489209 0.89568345
|
|
0.90107914 0.88489209 0.89766607 0.8994614 ]
|
|
|
|
mean value: 0.895899797217881
|
|
|
|
key: test_fscore
|
|
value: [0.84848485 0.89230769 0.92063492 0.85245902 0.88888889 0.88888889
|
|
0.83333333 0.89552239 0.83870968 0.84057971]
|
|
|
|
mean value: 0.8699809364555999
|
|
|
|
key: train_fscore
|
|
value: [0.8991453 0.89383562 0.89608177 0.9254766 0.89115646 0.90068493
|
|
0.90500864 0.89115646 0.90289608 0.90344828]
|
|
|
|
mean value: 0.9008890140313144
|
|
|
|
key: test_precision
|
|
value: [0.8 0.85294118 0.90625 0.86666667 0.875 0.875
|
|
0.73170732 0.83333333 0.8125 0.76315789]
|
|
|
|
mean value: 0.8316556388280602
|
|
|
|
key: train_precision
|
|
value: [0.85667752 0.85294118 0.85113269 0.89297659 0.84516129 0.85947712
|
|
0.87043189 0.84516129 0.86038961 0.86754967]
|
|
|
|
mean value: 0.8601898853393118
|
|
|
|
key: test_recall
|
|
value: [0.90322581 0.93548387 0.93548387 0.83870968 0.90322581 0.90322581
|
|
0.96774194 0.96774194 0.86666667 0.93548387]
|
|
|
|
mean value: 0.9156989247311828
|
|
|
|
key: train_recall
|
|
value: [0.94604317 0.93884892 0.94604317 0.96043165 0.94244604 0.94604317
|
|
0.94244604 0.94244604 0.94982079 0.94244604]
|
|
|
|
mean value: 0.9457015033134782
|
|
|
|
key: test_roc_auc
|
|
value: [0.83870968 0.88709677 0.91935484 0.85483871 0.88709677 0.88709677
|
|
0.80645161 0.88709677 0.83655914 0.81774194]
|
|
|
|
mean value: 0.8622043010752688
|
|
|
|
key: train_roc_auc
|
|
value: [0.89388489 0.88848921 0.89028777 0.92266187 0.88489209 0.89568345
|
|
0.90107914 0.88489209 0.89757226 0.89953843]
|
|
|
|
mean value: 0.8958981202135066
|
|
|
|
key: test_jcc
|
|
value: [0.73684211 0.80555556 0.85294118 0.74285714 0.8 0.8
|
|
0.71428571 0.81081081 0.72222222 0.725 ]
|
|
|
|
mean value: 0.7710514727465192
|
|
|
|
key: train_jcc
|
|
value: [0.81677019 0.80804954 0.8117284 0.86129032 0.80368098 0.81931464
|
|
0.82649842 0.80368098 0.82298137 0.82389937]
|
|
|
|
mean value: 0.8197894204757968
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.93708968 2.20079994 2.05276179 2.0693748 2.06285834 2.05502129
|
|
2.20690989 2.06774426 2.16239572 2.21165562]
|
|
|
|
mean value: 2.1026611328125
|
|
|
|
key: score_time
|
|
value: [0.0157094 0.02155018 0.02022552 0.01245332 0.01499414 0.02136111
|
|
0.01244831 0.01244354 0.02483344 0.02263308]
|
|
|
|
mean value: 0.017865204811096193
|
|
|
|
key: test_mcc
|
|
value: [0.87278605 0.87278605 0.84266484 0.82199494 0.87096774 0.87278605
|
|
0.74348441 0.90369611 0.71525965 0.96770777]
|
|
|
|
mean value: 0.8484133607436208
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.99280576 1. 1.
|
|
0.99640932 0.99640932 0.99641572 1. ]
|
|
|
|
mean value: 0.9982040128141526
|
|
|
|
key: test_accuracy
|
|
value: [0.93548387 0.93548387 0.91935484 0.90322581 0.93548387 0.93548387
|
|
0.87096774 0.9516129 0.85245902 0.98360656]
|
|
|
|
mean value: 0.922316234796404
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.99640288 1. 1.
|
|
0.99820144 0.99820144 0.99820467 1. ]
|
|
|
|
mean value: 0.9991010423259238
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.9375 0.91525424 0.89285714 0.93548387 0.93333333
|
|
0.875 0.95081967 0.86153846 0.98412698]
|
|
|
|
mean value: 0.9223413702242946
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.99640288 1. 1.
|
|
0.99820467 0.9981982 0.99821109 1. ]
|
|
|
|
mean value: 0.9991016834993942
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.90909091 0.96428571 1. 0.93548387 0.96551724
|
|
0.84848485 0.96666667 0.8 0.96875 ]
|
|
|
|
mean value: 0.92673701599661
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.99640288 1. 1.
|
|
0.99641577 1. 0.99642857 1. ]
|
|
|
|
mean value: 0.9989247219735732
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.87096774 0.80645161 0.93548387 0.90322581
|
|
0.90322581 0.93548387 0.93333333 1. ]
|
|
|
|
mean value: 0.9223655913978495
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.99640288 1. 1.
|
|
1. 0.99640288 1. 1. ]
|
|
|
|
mean value: 0.9992805755395684
|
|
|
|
key: test_roc_auc
|
|
value: [0.93548387 0.93548387 0.91935484 0.90322581 0.93548387 0.93548387
|
|
0.87096774 0.9516129 0.85376344 0.98333333]
|
|
|
|
mean value: 0.9224193548387097
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.99640288 1. 1.
|
|
0.99820144 0.99820144 0.99820144 1. ]
|
|
|
|
mean value: 0.9991007194244604
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.88235294 0.84375 0.80645161 0.87878788 0.875
|
|
0.77777778 0.90625 0.75675676 0.96875 ]
|
|
|
|
mean value: 0.8578229908578581
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.99283154 1. 1.
|
|
0.99641577 0.99640288 0.99642857 1. ]
|
|
|
|
mean value: 0.998207876095437
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03720474 0.02276659 0.02222443 0.02198648 0.02197099 0.02208471
|
|
0.02081251 0.02087331 0.02242088 0.01969099]
|
|
|
|
mean value: 0.023203563690185548
|
|
|
|
key: score_time
|
|
value: [0.00942612 0.0091238 0.00888109 0.00883532 0.00888824 0.00891638
|
|
0.0089848 0.00916314 0.00912094 0.00890946]
|
|
|
|
mean value: 0.009024930000305176
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.90369611 0.96824584 0.93743687 0.90748521 1.
|
|
0.87278605 0.84266484 0.87082935 0.9344086 ]
|
|
|
|
mean value: 0.9205798710435014
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.9516129 0.98387097 0.96774194 0.9516129 1.
|
|
0.93548387 0.91935484 0.93442623 0.96721311]
|
|
|
|
mean value: 0.9595187731359069
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98360656 0.95081967 0.98412698 0.96666667 0.94915254 1.
|
|
0.9375 0.91525424 0.93548387 0.96774194]
|
|
|
|
mean value: 0.9590352466414477
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96666667 0.96875 1. 1. 1.
|
|
0.90909091 0.96428571 0.90625 0.96774194]
|
|
|
|
mean value: 0.9682785225527161
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 1. 0.93548387 0.90322581 1.
|
|
0.96774194 0.87096774 0.96666667 0.96774194]
|
|
|
|
mean value: 0.951505376344086
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.9516129 0.98387097 0.96774194 0.9516129 1.
|
|
0.93548387 0.91935484 0.93494624 0.9672043 ]
|
|
|
|
mean value: 0.9595698924731183
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96774194 0.90625 0.96875 0.93548387 0.90322581 1.
|
|
0.88235294 0.84375 0.87878788 0.9375 ]
|
|
|
|
mean value: 0.9223842432867575
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1242733 0.12514329 0.12573195 0.12572408 0.12674093 0.12578678
|
|
0.12831879 0.12836862 0.12577057 0.12617683]
|
|
|
|
mean value: 0.12620351314544678
|
|
|
|
key: score_time
|
|
value: [0.01789355 0.01834917 0.01798558 0.01879549 0.0181818 0.01906204
|
|
0.01819825 0.01794028 0.01799631 0.01796556]
|
|
|
|
mean value: 0.018236804008483886
|
|
|
|
key: test_mcc
|
|
value: [0.87278605 0.87278605 0.90369611 0.93743687 0.93743687 0.83914639
|
|
0.87831007 0.90748521 0.8403496 0.8688172 ]
|
|
|
|
mean value: 0.8858250418390108
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93548387 0.93548387 0.9516129 0.96774194 0.96774194 0.91935484
|
|
0.93548387 0.9516129 0.91803279 0.93442623]
|
|
|
|
mean value: 0.9416975145425701
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.9375 0.95081967 0.96666667 0.96666667 0.92063492
|
|
0.93939394 0.95384615 0.92063492 0.93548387]
|
|
|
|
mean value: 0.9429146810942157
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.90909091 0.96666667 1. 1. 0.90625
|
|
0.88571429 0.91176471 0.87878788 0.93548387]
|
|
|
|
mean value: 0.9302849226200745
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.93548387 0.93548387 0.93548387 0.93548387
|
|
1. 1. 0.96666667 0.93548387]
|
|
|
|
mean value: 0.9579569892473118
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93548387 0.93548387 0.9516129 0.96774194 0.96774194 0.91935484
|
|
0.93548387 0.9516129 0.9188172 0.9344086 ]
|
|
|
|
mean value: 0.9417741935483872
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.88235294 0.90625 0.93548387 0.93548387 0.85294118
|
|
0.88571429 0.91176471 0.85294118 0.87878788]
|
|
|
|
mean value: 0.8924072847614118
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.36
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01059222 0.01075006 0.01117396 0.01065135 0.01077533 0.01067138
|
|
0.01046896 0.01045394 0.01072764 0.01058769]
|
|
|
|
mean value: 0.010685253143310546
|
|
|
|
key: score_time
|
|
value: [0.0090189 0.00899482 0.00891805 0.00895977 0.00898051 0.00880671
|
|
0.00890422 0.00889993 0.00933266 0.00889468]
|
|
|
|
mean value: 0.008971023559570312
|
|
|
|
key: test_mcc
|
|
value: [0.61807005 0.68313005 0.71004695 0.61807005 0.84266484 0.45760432
|
|
0.54953196 0.64549722 0.60645161 0.80322581]
|
|
|
|
mean value: 0.6534292846193077
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.80645161 0.83870968 0.85483871 0.80645161 0.91935484 0.72580645
|
|
0.77419355 0.82258065 0.80327869 0.90163934]
|
|
|
|
mean value: 0.8253305129561079
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.79310345 0.82758621 0.85714286 0.79310345 0.91525424 0.70175439
|
|
0.76666667 0.81967213 0.8 0.90322581]
|
|
|
|
mean value: 0.8177509188110001
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85185185 0.88888889 0.84375 0.85185185 0.96428571 0.76923077
|
|
0.79310345 0.83333333 0.8 0.90322581]
|
|
|
|
mean value: 0.8499521664169885
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.74193548 0.77419355 0.87096774 0.74193548 0.87096774 0.64516129
|
|
0.74193548 0.80645161 0.8 0.90322581]
|
|
|
|
mean value: 0.7896774193548387
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.80645161 0.83870968 0.85483871 0.80645161 0.91935484 0.72580645
|
|
0.77419355 0.82258065 0.80322581 0.9016129 ]
|
|
|
|
mean value: 0.8253225806451613
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.65714286 0.70588235 0.75 0.65714286 0.84375 0.54054054
|
|
0.62162162 0.69444444 0.66666667 0.82352941]
|
|
|
|
mean value: 0.696072075226487
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.83548903 1.8379004 1.86891341 1.86416602 1.86113143 1.84892035
|
|
1.83393502 1.84949994 1.83295679 1.83171296]
|
|
|
|
mean value: 1.8464625358581543
|
|
|
|
key: score_time
|
|
value: [0.0921917 0.09853959 0.09397936 0.09889221 0.09865165 0.09259009
|
|
0.09273338 0.09274316 0.09208941 0.09240556]
|
|
|
|
mean value: 0.09448161125183105
|
|
|
|
key: test_mcc
|
|
value: [0.90369611 0.96824584 0.96824584 0.96824584 0.96824584 1.
|
|
0.87831007 0.96824584 0.87082935 0.93635873]
|
|
|
|
mean value: 0.9430423448212051
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9516129 0.98387097 0.98387097 0.98387097 0.98387097 1.
|
|
0.93548387 0.98387097 0.93442623 0.96721311]
|
|
|
|
mean value: 0.970809095716552
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.98412698 0.98360656 0.98360656 0.98360656 1.
|
|
0.93939394 0.98412698 0.93548387 0.96875 ]
|
|
|
|
mean value: 0.9715082403127749
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.96875 1. 1. 1. 1.
|
|
0.88571429 0.96875 0.90625 0.93939394]
|
|
|
|
mean value: 0.9606358225108225
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.96774194 0.96774194 0.96774194 1.
|
|
1. 1. 0.96666667 1. ]
|
|
|
|
mean value: 0.983763440860215
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9516129 0.98387097 0.98387097 0.98387097 0.98387097 1.
|
|
0.93548387 0.98387097 0.93494624 0.96666667]
|
|
|
|
mean value: 0.9708064516129032
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.96875 0.96774194 0.96774194 0.96774194 1.
|
|
0.88571429 0.96875 0.87878788 0.93939394]
|
|
|
|
mean value: 0.9453712819438626
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.21
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.00086403 1.01194215 1.00798345 1.02354908 0.99338937 1.05827212
|
|
0.98856235 0.96865797 1.00004292 1.03345537]
|
|
|
|
mean value: 1.0086718797683716
|
|
|
|
key: score_time
|
|
value: [0.2509551 0.21219444 0.20999908 0.22287941 0.26200318 0.24685287
|
|
0.26816392 0.22774673 0.2380302 0.17141843]
|
|
|
|
mean value: 0.23102433681488038
|
|
|
|
key: test_mcc
|
|
value: [0.84266484 0.90369611 0.96824584 0.96824584 0.96824584 0.96824584
|
|
0.82199494 0.96824584 0.90215054 0.87055472]
|
|
|
|
mean value: 0.9182290332262679
|
|
|
|
key: train_mcc
|
|
value: [0.98563702 0.97844259 0.98207157 0.98207157 0.97844259 0.98207157
|
|
0.97487691 0.98563702 0.98566253 0.97848145]
|
|
|
|
mean value: 0.9813394821071346
|
|
|
|
key: test_accuracy
|
|
value: [0.91935484 0.9516129 0.98387097 0.98387097 0.98387097 0.98387097
|
|
0.90322581 0.98387097 0.95081967 0.93442623]
|
|
|
|
mean value: 0.9578794288736119
|
|
|
|
key: train_accuracy
|
|
value: [0.99280576 0.98920863 0.99100719 0.99100719 0.98920863 0.99100719
|
|
0.98741007 0.99280576 0.99281867 0.98922801]
|
|
|
|
mean value: 0.9906507110290224
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.95238095 0.98360656 0.98360656 0.98360656 0.98360656
|
|
0.91176471 0.98412698 0.95081967 0.9375 ]
|
|
|
|
mean value: 0.9594095467106557
|
|
|
|
key: train_fscore
|
|
value: [0.99283154 0.98924731 0.99105546 0.99105546 0.98924731 0.99105546
|
|
0.98747764 0.99283154 0.99285714 0.98924731]
|
|
|
|
mean value: 0.9906906167933925
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.9375 1. 1. 1. 1.
|
|
0.83783784 0.96875 0.93548387 0.90909091]
|
|
|
|
mean value: 0.9471015559072959
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.98928571 0.98571429 0.98576512 0.98576512 0.98571429 0.98576512
|
|
0.98220641 0.98928571 0.98932384 0.98571429]
|
|
|
|
mean value: 0.9864539908490086
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.96774194 0.96774194 0.96774194 0.96774194
|
|
1. 1. 0.96666667 0.96774194]
|
|
|
|
mean value: 0.9740860215053764
|
|
|
|
key: train_recall
|
|
value: [0.99640288 0.99280576 0.99640288 0.99640288 0.99280576 0.99640288
|
|
0.99280576 0.99640288 0.99641577 0.99280576]
|
|
|
|
mean value: 0.9949653180681262
|
|
|
|
key: test_roc_auc
|
|
value: [0.91935484 0.9516129 0.98387097 0.98387097 0.98387097 0.98387097
|
|
0.90322581 0.98387097 0.95107527 0.93387097]
|
|
|
|
mean value: 0.9578494623655914
|
|
|
|
key: train_roc_auc
|
|
value: [0.99280576 0.98920863 0.99100719 0.99100719 0.98920863 0.99100719
|
|
0.98741007 0.99280576 0.9928122 0.98923442]
|
|
|
|
mean value: 0.9906507052422577
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.90909091 0.96774194 0.96774194 0.96774194 0.96774194
|
|
0.83783784 0.96875 0.90625 0.88235294]
|
|
|
|
mean value: 0.9232392287183558
|
|
|
|
key: train_jcc
|
|
value: [0.98576512 0.9787234 0.9822695 0.9822695 0.9787234 0.9822695
|
|
0.97526502 0.98576512 0.9858156 0.9787234 ]
|
|
|
|
mean value: 0.9815589593019299
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.25
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02520299 0.01059842 0.01069093 0.0106535 0.01082826 0.01074505
|
|
0.0108285 0.01074529 0.01063871 0.01066732]
|
|
|
|
mean value: 0.012159895896911622
|
|
|
|
key: score_time
|
|
value: [0.01116538 0.00920391 0.00933433 0.00932336 0.0090549 0.00906706
|
|
0.00910687 0.00905514 0.00927353 0.00904298]
|
|
|
|
mean value: 0.009362745285034179
|
|
|
|
key: test_mcc
|
|
value: [0.61418277 0.64549722 0.7190925 0.61418277 0.67883359 0.64549722
|
|
0.56761348 0.64820372 0.61256703 0.60733867]
|
|
|
|
mean value: 0.6353008987490497
|
|
|
|
key: train_mcc
|
|
value: [0.67625899 0.66202471 0.66557529 0.69446479 0.683524 0.65510022
|
|
0.68741069 0.6870548 0.67744343 0.68448223]
|
|
|
|
mean value: 0.6773339149399755
|
|
|
|
key: test_accuracy
|
|
value: [0.80645161 0.82258065 0.85483871 0.80645161 0.83870968 0.82258065
|
|
0.77419355 0.82258065 0.80327869 0.80327869]
|
|
|
|
mean value: 0.8154944473823373
|
|
|
|
key: train_accuracy
|
|
value: [0.8381295 0.83093525 0.83273381 0.8471223 0.84172662 0.82733813
|
|
0.84352518 0.84352518 0.83842011 0.84201077]
|
|
|
|
mean value: 0.8385466850935769
|
|
|
|
key: test_fscore
|
|
value: [0.8125 0.82539683 0.84210526 0.8 0.83333333 0.81967213
|
|
0.8 0.83076923 0.8125 0.8125 ]
|
|
|
|
mean value: 0.8188776783804825
|
|
|
|
key: train_fscore
|
|
value: [0.8381295 0.83274021 0.8342246 0.84902309 0.84285714 0.83038869
|
|
0.8460177 0.8438061 0.84210526 0.84452297]
|
|
|
|
mean value: 0.8403815269479368
|
|
|
|
key: test_precision
|
|
value: [0.78787879 0.8125 0.92307692 0.82758621 0.86206897 0.83333333
|
|
0.71794872 0.79411765 0.76470588 0.78787879]
|
|
|
|
mean value: 0.8111095251942108
|
|
|
|
key: train_precision
|
|
value: [0.8381295 0.82394366 0.82685512 0.83859649 0.83687943 0.81597222
|
|
0.83275261 0.84229391 0.82474227 0.82986111]
|
|
|
|
mean value: 0.8310026327326828
|
|
|
|
key: test_recall
|
|
value: [0.83870968 0.83870968 0.77419355 0.77419355 0.80645161 0.80645161
|
|
0.90322581 0.87096774 0.86666667 0.83870968]
|
|
|
|
mean value: 0.8318279569892473
|
|
|
|
key: train_recall
|
|
value: [0.8381295 0.84172662 0.84172662 0.85971223 0.84892086 0.84532374
|
|
0.85971223 0.84532374 0.86021505 0.85971223]
|
|
|
|
mean value: 0.8500502823547613
|
|
|
|
key: test_roc_auc
|
|
value: [0.80645161 0.82258065 0.85483871 0.80645161 0.83870968 0.82258065
|
|
0.77419355 0.82258065 0.80430108 0.80268817]
|
|
|
|
mean value: 0.8155376344086022
|
|
|
|
key: train_roc_auc
|
|
value: [0.8381295 0.83093525 0.83273381 0.8471223 0.84172662 0.82733813
|
|
0.84352518 0.84352518 0.83838091 0.8420425 ]
|
|
|
|
mean value: 0.8385459374435935
|
|
|
|
key: test_jcc
|
|
value: [0.68421053 0.7027027 0.72727273 0.66666667 0.71428571 0.69444444
|
|
0.66666667 0.71052632 0.68421053 0.68421053]
|
|
|
|
mean value: 0.6935196816775764
|
|
|
|
key: train_jcc
|
|
value: [0.72136223 0.71341463 0.71559633 0.73765432 0.72839506 0.70996979
|
|
0.73312883 0.72981366 0.72727273 0.73088685]
|
|
|
|
mean value: 0.7247494441137159
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.09478092 0.07708526 0.08946919 0.23117948 0.07251072 0.07828522
|
|
0.08111978 0.0676651 0.07024503 0.07174253]
|
|
|
|
mean value: 0.09340832233428956
|
|
|
|
key: score_time
|
|
value: [0.01199675 0.01191998 0.01227951 0.0113728 0.01115179 0.01123452
|
|
0.01101041 0.01072311 0.01149058 0.01141882]
|
|
|
|
mean value: 0.011459827423095703
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.93743687 0.96824584 1. 1. 1.
|
|
0.87278605 0.93548387 0.90215054 0.96770777]
|
|
|
|
mean value: 0.9552056769139011
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.96774194 0.98387097 1. 1. 1.
|
|
0.93548387 0.96774194 0.95081967 0.98360656]
|
|
|
|
mean value: 0.9773135906927551
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98360656 0.96666667 0.98360656 1. 1. 1.
|
|
0.9375 0.96774194 0.95081967 0.98412698]
|
|
|
|
mean value: 0.9774068373162768
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.90909091 0.96774194 0.93548387 0.96875 ]
|
|
|
|
mean value: 0.9781066715542522
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 0.96774194 1. 1. 1.
|
|
0.96774194 0.96774194 0.96666667 1. ]
|
|
|
|
mean value: 0.9773118279569892
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.96774194 0.98387097 1. 1. 1.
|
|
0.93548387 0.96774194 0.95107527 0.98333333]
|
|
|
|
mean value: 0.9773118279569892
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96774194 0.93548387 0.96774194 1. 1. 1.
|
|
0.88235294 0.9375 0.90625 0.96875 ]
|
|
|
|
mean value: 0.9565820683111954
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04589272 0.06550336 0.05837011 0.08017612 0.0648365 0.09038734
|
|
0.06400347 0.06171775 0.09057832 0.05633402]
|
|
|
|
mean value: 0.06777997016906738
|
|
|
|
key: score_time
|
|
value: [0.01948833 0.01549053 0.01913047 0.01229525 0.01808548 0.01914907
|
|
0.01216698 0.01918221 0.01920176 0.01234913]
|
|
|
|
mean value: 0.016653919219970705
|
|
|
|
key: test_mcc
|
|
value: [0.93743687 0.87096774 0.87096774 0.80813523 0.78446454 0.87278605
|
|
0.93743687 0.84266484 0.80516731 0.8688172 ]
|
|
|
|
mean value: 0.8598844392978637
|
|
|
|
key: train_mcc
|
|
value: [0.97124816 0.96768225 0.96073627 0.96058703 0.96405373 0.95353974
|
|
0.94986154 0.94634322 0.94643646 0.95362457]
|
|
|
|
mean value: 0.9574112963993591
|
|
|
|
key: test_accuracy
|
|
value: [0.96774194 0.93548387 0.93548387 0.90322581 0.88709677 0.93548387
|
|
0.96774194 0.91935484 0.90163934 0.93442623]
|
|
|
|
mean value: 0.9287678476996298
|
|
|
|
key: train_accuracy
|
|
value: [0.98561151 0.98381295 0.98021583 0.98021583 0.98201439 0.97661871
|
|
0.97482014 0.97302158 0.97307002 0.97666068]
|
|
|
|
mean value: 0.9786061635431331
|
|
|
|
key: test_fscore
|
|
value: [0.96666667 0.93548387 0.93548387 0.9 0.87719298 0.93333333
|
|
0.96875 0.91525424 0.90322581 0.93548387]
|
|
|
|
mean value: 0.9270874639099115
|
|
|
|
key: train_fscore
|
|
value: [0.98566308 0.98389982 0.98046181 0.98039216 0.98207885 0.97690941
|
|
0.97508897 0.97335702 0.97345133 0.97690941]
|
|
|
|
mean value: 0.9788211864278304
|
|
|
|
key: test_precision
|
|
value: [1. 0.93548387 0.93548387 0.93103448 0.96153846 0.96551724
|
|
0.93939394 0.96428571 0.875 0.93548387]
|
|
|
|
mean value: 0.9443221452259272
|
|
|
|
key: train_precision
|
|
value: [0.98214286 0.97864769 0.96842105 0.97173145 0.97857143 0.96491228
|
|
0.96478873 0.96140351 0.96153846 0.96491228]
|
|
|
|
mean value: 0.9697069738050123
|
|
|
|
key: test_recall
|
|
value: [0.93548387 0.93548387 0.93548387 0.87096774 0.80645161 0.90322581
|
|
1. 0.87096774 0.93333333 0.93548387]
|
|
|
|
mean value: 0.9126881720430107
|
|
|
|
key: train_recall
|
|
value: [0.98920863 0.98920863 0.99280576 0.98920863 0.98561151 0.98920863
|
|
0.98561151 0.98561151 0.98566308 0.98920863]
|
|
|
|
mean value: 0.9881346535674685
|
|
|
|
key: test_roc_auc
|
|
value: [0.96774194 0.93548387 0.93548387 0.90322581 0.88709677 0.93548387
|
|
0.96774194 0.91935484 0.90215054 0.9344086 ]
|
|
|
|
mean value: 0.9288172043010753
|
|
|
|
key: train_roc_auc
|
|
value: [0.98561151 0.98381295 0.98021583 0.98021583 0.98201439 0.97661871
|
|
0.97482014 0.97302158 0.97304737 0.97668317]
|
|
|
|
mean value: 0.9786061473401924
|
|
|
|
key: test_jcc
|
|
value: [0.93548387 0.87878788 0.87878788 0.81818182 0.78125 0.875
|
|
0.93939394 0.84375 0.82352941 0.87878788]
|
|
|
|
mean value: 0.8652952676671841
|
|
|
|
key: train_jcc
|
|
value: [0.97173145 0.96830986 0.96167247 0.96153846 0.96478873 0.95486111
|
|
0.95138889 0.94809689 0.94827586 0.95486111]
|
|
|
|
mean value: 0.9585524834711829
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02431893 0.01048541 0.01121306 0.01110697 0.01107454 0.01016903
|
|
0.00993896 0.01057076 0.01003838 0.01006365]
|
|
|
|
mean value: 0.011897969245910644
|
|
|
|
key: score_time
|
|
value: [0.00927353 0.00901461 0.00943899 0.00953412 0.00949192 0.00871015
|
|
0.00875497 0.00883937 0.00874853 0.00869203]
|
|
|
|
mean value: 0.009049820899963378
|
|
|
|
key: test_mcc
|
|
value: [0.61807005 0.67883359 0.83914639 0.54953196 0.61290323 0.67883359
|
|
0.52981294 0.7130241 0.54459739 0.54459739]
|
|
|
|
mean value: 0.6309350624207806
|
|
|
|
key: train_mcc
|
|
value: [0.64855706 0.64509217 0.63130015 0.66326227 0.66702732 0.6419512
|
|
0.65158942 0.63788443 0.65388715 0.6707996 ]
|
|
|
|
mean value: 0.6511350775809711
|
|
|
|
key: test_accuracy
|
|
value: [0.80645161 0.83870968 0.91935484 0.77419355 0.80645161 0.83870968
|
|
0.75806452 0.85483871 0.7704918 0.7704918 ]
|
|
|
|
mean value: 0.8137757800105764
|
|
|
|
key: train_accuracy
|
|
value: [0.82374101 0.82194245 0.8147482 0.83093525 0.83273381 0.82014388
|
|
0.82553957 0.81834532 0.82585278 0.83482944]
|
|
|
|
mean value: 0.8248811722614727
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.84375 0.92063492 0.76666667 0.80645161 0.84375
|
|
0.7826087 0.86153846 0.75 0.78787879]
|
|
|
|
mean value: 0.8181460963456055
|
|
|
|
key: train_fscore
|
|
value: [0.82867133 0.82722513 0.82149047 0.83623693 0.83826087 0.82638889
|
|
0.82892416 0.82373473 0.83304647 0.83916084]
|
|
|
|
mean value: 0.830313982226392
|
|
|
|
key: test_precision
|
|
value: [0.77142857 0.81818182 0.90625 0.79310345 0.80645161 0.81818182
|
|
0.71052632 0.82352941 0.80769231 0.74285714]
|
|
|
|
mean value: 0.7998202447074926
|
|
|
|
key: train_precision
|
|
value: [0.80612245 0.80338983 0.79264214 0.81081081 0.81144781 0.79865772
|
|
0.81314879 0.8 0.8013245 0.81632653]
|
|
|
|
mean value: 0.805387058318656
|
|
|
|
key: test_recall
|
|
value: [0.87096774 0.87096774 0.93548387 0.74193548 0.80645161 0.87096774
|
|
0.87096774 0.90322581 0.7 0.83870968]
|
|
|
|
mean value: 0.8409677419354838
|
|
|
|
key: train_recall
|
|
value: [0.85251799 0.85251799 0.85251799 0.86330935 0.86690647 0.85611511
|
|
0.84532374 0.84892086 0.86738351 0.86330935]
|
|
|
|
mean value: 0.8568822361465667
|
|
|
|
key: test_roc_auc
|
|
value: [0.80645161 0.83870968 0.91935484 0.77419355 0.80645161 0.83870968
|
|
0.75806452 0.85483871 0.76935484 0.76935484]
|
|
|
|
mean value: 0.8135483870967741
|
|
|
|
key: train_roc_auc
|
|
value: [0.82374101 0.82194245 0.8147482 0.83093525 0.83273381 0.82014388
|
|
0.82553957 0.81834532 0.82577809 0.83488048]
|
|
|
|
mean value: 0.8248788066321137
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.72972973 0.85294118 0.62162162 0.67567568 0.72972973
|
|
0.64285714 0.75675676 0.6 0.65 ]
|
|
|
|
mean value: 0.6951619525148937
|
|
|
|
key: train_jcc
|
|
value: [0.70746269 0.70535714 0.69705882 0.71856287 0.72155689 0.70414201
|
|
0.70783133 0.70029674 0.71386431 0.72289157]
|
|
|
|
mean value: 0.7099024359523051
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01834035 0.0359242 0.02628255 0.0305891 0.02540517 0.03625464
|
|
0.03056884 0.02949548 0.03100276 0.03467536]
|
|
|
|
mean value: 0.02985384464263916
|
|
|
|
key: score_time
|
|
value: [0.00996852 0.01121902 0.01181126 0.01182604 0.01179433 0.01183081
|
|
0.01177526 0.01184487 0.01179695 0.01184797]
|
|
|
|
mean value: 0.011571502685546875
|
|
|
|
key: test_mcc
|
|
value: [0.51507875 0.87278605 0.90748521 1. 0.87831007 0.90748521
|
|
0.81325006 0.87278605 0.77454559 0.93635873]
|
|
|
|
mean value: 0.8478085723184035
|
|
|
|
key: train_mcc
|
|
value: [0.59972626 0.97487691 0.94986154 0.97487691 0.90161686 0.9393413
|
|
0.97482645 0.97487691 0.97127459 0.97492232]
|
|
|
|
mean value: 0.9236200060442982
|
|
|
|
key: test_accuracy
|
|
value: [0.70967742 0.93548387 0.9516129 1. 0.93548387 0.9516129
|
|
0.90322581 0.93548387 0.8852459 0.96721311]
|
|
|
|
mean value: 0.9175039661554732
|
|
|
|
key: train_accuracy
|
|
value: [0.76618705 0.98741007 0.97482014 0.98741007 0.94964029 0.96942446
|
|
0.98741007 0.98741007 0.98563734 0.98743268]
|
|
|
|
mean value: 0.9582782248169148
|
|
|
|
key: test_fscore
|
|
value: [0.59090909 0.9375 0.94915254 1. 0.93103448 0.94915254
|
|
0.90909091 0.93333333 0.88888889 0.96875 ]
|
|
|
|
mean value: 0.9057811789726605
|
|
|
|
key: train_fscore
|
|
value: [0.69626168 0.98747764 0.97454545 0.98747764 0.94776119 0.96892139
|
|
0.98743268 0.98747764 0.98566308 0.98747764]
|
|
|
|
mean value: 0.9510496032258882
|
|
|
|
key: test_precision
|
|
value: [1. 0.90909091 1. 1. 1. 1.
|
|
0.85714286 0.96551724 0.84848485 0.93939394]
|
|
|
|
mean value: 0.9519629795491864
|
|
|
|
key: train_precision
|
|
value: [0.99333333 0.98220641 0.98529412 0.98220641 0.98449612 0.98513011
|
|
0.98566308 0.98220641 0.98566308 0.98220641]
|
|
|
|
mean value: 0.9848405474185916
|
|
|
|
key: test_recall
|
|
value: [0.41935484 0.96774194 0.90322581 1. 0.87096774 0.90322581
|
|
0.96774194 0.90322581 0.93333333 1. ]
|
|
|
|
mean value: 0.8868817204301075
|
|
|
|
key: train_recall
|
|
value: [0.53597122 0.99280576 0.96402878 0.99280576 0.91366906 0.95323741
|
|
0.98920863 0.99280576 0.98566308 0.99280576]
|
|
|
|
mean value: 0.9313001211933679
|
|
|
|
key: test_roc_auc
|
|
value: [0.70967742 0.93548387 0.9516129 1. 0.93548387 0.9516129
|
|
0.90322581 0.93548387 0.88602151 0.96666667]
|
|
|
|
mean value: 0.9175268817204302
|
|
|
|
key: train_roc_auc
|
|
value: [0.76618705 0.98741007 0.97482014 0.98741007 0.94964029 0.96942446
|
|
0.98741007 0.98741007 0.9856373 0.9874423 ]
|
|
|
|
mean value: 0.9582791831051288
|
|
|
|
key: test_jcc
|
|
value: [0.41935484 0.88235294 0.90322581 1. 0.87096774 0.90322581
|
|
0.83333333 0.875 0.8 0.93939394]
|
|
|
|
mean value: 0.842685440745213
|
|
|
|
key: train_jcc
|
|
value: [0.53405018 0.97526502 0.95035461 0.97526502 0.90070922 0.93971631
|
|
0.9751773 0.97526502 0.97173145 0.97526502]
|
|
|
|
mean value: 0.917279914545461
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02572179 0.02486515 0.01888251 0.0222733 0.02065015 0.02569151
|
|
0.01913404 0.02014589 0.02283072 0.01933765]
|
|
|
|
mean value: 0.021953272819519042
|
|
|
|
key: score_time
|
|
value: [0.0119679 0.01191592 0.01183343 0.01205277 0.01181006 0.01178956
|
|
0.01178837 0.0118444 0.01184034 0.01183796]
|
|
|
|
mean value: 0.01186807155609131
|
|
|
|
key: test_mcc
|
|
value: [0.81325006 0.83914639 0.74193548 0.90369611 0.87831007 1.
|
|
0.81325006 0.81325006 0.80516731 0.78156791]
|
|
|
|
mean value: 0.8389573460852846
|
|
|
|
key: train_mcc
|
|
value: [0.96425338 0.93301383 0.91404761 0.96425338 0.87765675 0.91941603
|
|
0.91755711 0.87385975 0.90947207 0.83401471]
|
|
|
|
mean value: 0.910754464045474
|
|
|
|
key: test_accuracy
|
|
value: [0.90322581 0.91935484 0.87096774 0.9516129 0.93548387 1.
|
|
0.90322581 0.90322581 0.90163934 0.8852459 ]
|
|
|
|
mean value: 0.9173982020095187
|
|
|
|
key: train_accuracy
|
|
value: [0.98201439 0.96582734 0.95683453 0.98201439 0.93705036 0.95863309
|
|
0.95863309 0.93345324 0.95332136 0.91202873]
|
|
|
|
mean value: 0.9539810521421284
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.91803279 0.87096774 0.95238095 0.93103448 1.
|
|
0.90909091 0.90909091 0.90322581 0.87719298]
|
|
|
|
mean value: 0.9180107480140782
|
|
|
|
key: train_fscore
|
|
value: [0.98220641 0.96487985 0.95744681 0.98220641 0.93408663 0.96
|
|
0.95914742 0.93739425 0.95517241 0.90448343]
|
|
|
|
mean value: 0.9537023617168902
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.93333333 0.87096774 0.9375 1. 1.
|
|
0.85714286 0.85714286 0.875 0.96153846]
|
|
|
|
mean value: 0.914976810823585
|
|
|
|
key: train_precision
|
|
value: [0.97183099 0.99239544 0.94405594 0.97183099 0.98023715 0.92929293
|
|
0.94736842 0.88498403 0.92026578 0.98723404]
|
|
|
|
mean value: 0.952949570648824
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.90322581 0.87096774 0.96774194 0.87096774 1.
|
|
0.96774194 0.96774194 0.93333333 0.80645161]
|
|
|
|
mean value: 0.9255913978494623
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.93884892 0.97122302 0.99280576 0.89208633 0.99280576
|
|
0.97122302 0.99640288 0.99283154 0.83453237]
|
|
|
|
mean value: 0.9575565354168278
|
|
|
|
key: test_roc_auc
|
|
value: [0.90322581 0.91935484 0.87096774 0.9516129 0.93548387 1.
|
|
0.90322581 0.90322581 0.90215054 0.88655914]
|
|
|
|
mean value: 0.9175806451612903
|
|
|
|
key: train_roc_auc
|
|
value: [0.98201439 0.96582734 0.95683453 0.98201439 0.93705036 0.95863309
|
|
0.95863309 0.93345324 0.9532503 0.91188984]
|
|
|
|
mean value: 0.9539600577602434
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.84848485 0.77142857 0.90909091 0.87096774 1.
|
|
0.83333333 0.83333333 0.82352941 0.78125 ]
|
|
|
|
mean value: 0.8504751482704519
|
|
|
|
key: train_jcc
|
|
value: [0.96503497 0.93214286 0.91836735 0.96503497 0.87632509 0.92307692
|
|
0.92150171 0.88216561 0.91419142 0.82562278]
|
|
|
|
mean value: 0.9123463652090518
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.2011168 0.18491268 0.18607616 0.19021749 0.18520522 0.18583274
|
|
0.19318509 0.18674707 0.18786812 0.18651438]
|
|
|
|
mean value: 0.18876757621765136
|
|
|
|
key: score_time
|
|
value: [0.01528382 0.01524591 0.01567864 0.01531458 0.01553869 0.01541805
|
|
0.0158565 0.01555061 0.01530504 0.0157001 ]
|
|
|
|
mean value: 0.01548919677734375
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.96824584 0.96824584 0.90748521 0.96824584 1.
|
|
0.87278605 0.93548387 0.90215054 0.96770777]
|
|
|
|
mean value: 0.9458596788654657
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.98387097 0.98387097 0.9516129 0.98387097 1.
|
|
0.93548387 0.96774194 0.95081967 0.98360656]
|
|
|
|
mean value: 0.9724748810153359
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98360656 0.98412698 0.98360656 0.94915254 0.98360656 1.
|
|
0.9375 0.96774194 0.95081967 0.98412698]
|
|
|
|
mean value: 0.9724287790373015
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96875 1. 1. 1. 1.
|
|
0.90909091 0.96774194 0.93548387 0.96875 ]
|
|
|
|
mean value: 0.9749816715542522
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.96774194 0.90322581 0.96774194 1.
|
|
0.96774194 0.96774194 0.96666667 1. ]
|
|
|
|
mean value: 0.9708602150537634
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.98387097 0.98387097 0.9516129 0.98387097 1.
|
|
0.93548387 0.96774194 0.95107527 0.98333333]
|
|
|
|
mean value: 0.97247311827957
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96774194 0.96875 0.96774194 0.90322581 0.96774194 1.
|
|
0.88235294 0.9375 0.90625 0.96875 ]
|
|
|
|
mean value: 0.9470054554079697
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07024312 0.06641197 0.09449553 0.06638598 0.07757902 0.08163714
|
|
0.08580494 0.07853961 0.09642267 0.08542395]
|
|
|
|
mean value: 0.08029439449310302
|
|
|
|
key: score_time
|
|
value: [0.03562093 0.03045201 0.03463125 0.02480125 0.04036546 0.03743124
|
|
0.03029346 0.03446913 0.04009056 0.03119063]
|
|
|
|
mean value: 0.033934593200683594
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.93743687 0.96824584 0.93743687 0.96824584 1.
|
|
0.96824584 0.84266484 0.90215054 0.96770777]
|
|
|
|
mean value: 0.9460380230276315
|
|
|
|
key: train_mcc
|
|
value: [0.99640932 0.99640932 0.98926624 0.99283145 0.99640932 0.99640932
|
|
0.98563702 1. 0.99641572 0.99641572]
|
|
|
|
mean value: 0.9946203448868983
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.96774194 0.98387097 0.96774194 0.98387097 1.
|
|
0.98387097 0.91935484 0.95081967 0.98360656]
|
|
|
|
mean value: 0.9724748810153359
|
|
|
|
key: train_accuracy
|
|
value: [0.99820144 0.99820144 0.99460432 0.99640288 0.99820144 0.99820144
|
|
0.99280576 1. 0.99820467 0.99820467]
|
|
|
|
mean value: 0.9973028040763081
|
|
|
|
key: test_fscore
|
|
value: [0.98360656 0.96666667 0.98360656 0.96666667 0.98360656 1.
|
|
0.98412698 0.91525424 0.95081967 0.98412698]
|
|
|
|
mean value: 0.9718480883137732
|
|
|
|
key: train_fscore
|
|
value: [0.9981982 0.9981982 0.99457505 0.99638989 0.99820467 0.9981982
|
|
0.99277978 1. 0.99821109 0.9981982 ]
|
|
|
|
mean value: 0.9972953272188904
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.96875 0.96428571 0.93548387 0.96875 ]
|
|
|
|
mean value: 0.9837269585253456
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.99641577 1.
|
|
0.99637681 1. 0.99642857 1. ]
|
|
|
|
mean value: 0.9989221153632093
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 0.96774194 0.93548387 0.96774194 1.
|
|
1. 0.87096774 0.96666667 1. ]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
0.9611827956989247
|
|
|
|
key: train_recall
|
|
value: [0.99640288 0.99640288 0.98920863 0.99280576 1. 0.99640288
|
|
0.98920863 1. 1. 0.99640288]
|
|
|
|
mean value: 0.99568345323741
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.96774194 0.98387097 0.96774194 0.98387097 1.
|
|
0.98387097 0.91935484 0.95107527 0.98333333]
|
|
|
|
mean value: 0.97247311827957
|
|
|
|
key: train_roc_auc
|
|
value: [0.99820144 0.99820144 0.99460432 0.99640288 0.99820144 0.99820144
|
|
0.99280576 1. 0.99820144 0.99820144]
|
|
|
|
mean value: 0.9973021582733813
|
|
|
|
key: test_jcc
|
|
value: [0.96774194 0.93548387 0.96774194 0.93548387 0.96774194 1.
|
|
0.96875 0.84375 0.90625 0.96875 ]
|
|
|
|
mean value: 0.9461693548387097
|
|
|
|
key: train_jcc
|
|
value: [0.99640288 0.99640288 0.98920863 0.99280576 0.99641577 0.99640288
|
|
0.98566308 1. 0.99642857 0.99640288]
|
|
|
|
mean value: 0.9946133323755741
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19087005 0.18680048 0.12759018 0.22758508 0.21411705 0.20253134
|
|
0.20308423 0.20282555 0.17823601 0.17444253]
|
|
|
|
mean value: 0.19080824851989747
|
|
|
|
key: score_time
|
|
value: [0.0271349 0.01631045 0.01627851 0.03460932 0.03289127 0.02667069
|
|
0.02678728 0.02722788 0.02839899 0.03015685]
|
|
|
|
mean value: 0.02664661407470703
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.7130241 0.74819006 0.74819006 0.83914639 0.7130241
|
|
0.61290323 0.74348441 0.83984455 0.77096774]
|
|
|
|
mean value: 0.7503371290040649
|
|
|
|
key: train_mcc
|
|
value: [0.96043787 0.97122302 0.97124816 0.97122302 0.96402878 0.96402878
|
|
0.97487691 0.97124816 0.97127459 0.95693712]
|
|
|
|
mean value: 0.9676526401636363
|
|
|
|
key: test_accuracy
|
|
value: [0.88709677 0.85483871 0.87096774 0.87096774 0.91935484 0.85483871
|
|
0.80645161 0.87096774 0.91803279 0.8852459 ]
|
|
|
|
mean value: 0.8738762559492332
|
|
|
|
key: train_accuracy
|
|
value: [0.98021583 0.98561151 0.98561151 0.98561151 0.98201439 0.98201439
|
|
0.98741007 0.98561151 0.98563734 0.97845601]
|
|
|
|
mean value: 0.9838194076695556
|
|
|
|
key: test_fscore
|
|
value: [0.8852459 0.86153846 0.86206897 0.86206897 0.91803279 0.84745763
|
|
0.80645161 0.86666667 0.9122807 0.8852459 ]
|
|
|
|
mean value: 0.8707057591179801
|
|
|
|
key: train_fscore
|
|
value: [0.98025135 0.98561151 0.98555957 0.98561151 0.98201439 0.98201439
|
|
0.98734177 0.98555957 0.98566308 0.97849462]
|
|
|
|
mean value: 0.9838121756879349
|
|
|
|
key: test_precision
|
|
value: [0.9 0.82352941 0.92592593 0.92592593 0.93333333 0.89285714
|
|
0.80645161 0.89655172 0.96296296 0.9 ]
|
|
|
|
mean value: 0.8967538039811154
|
|
|
|
key: train_precision
|
|
value: [0.97849462 0.98561151 0.98913043 0.98561151 0.98201439 0.98201439
|
|
0.99272727 0.98913043 0.98566308 0.975 ]
|
|
|
|
mean value: 0.9845397646946831
|
|
|
|
key: test_recall
|
|
value: [0.87096774 0.90322581 0.80645161 0.80645161 0.90322581 0.80645161
|
|
0.80645161 0.83870968 0.86666667 0.87096774]
|
|
|
|
mean value: 0.8479569892473118
|
|
|
|
key: train_recall
|
|
value: [0.98201439 0.98561151 0.98201439 0.98561151 0.98201439 0.98201439
|
|
0.98201439 0.98201439 0.98566308 0.98201439]
|
|
|
|
mean value: 0.983098682344447
|
|
|
|
key: test_roc_auc
|
|
value: [0.88709677 0.85483871 0.87096774 0.87096774 0.91935484 0.85483871
|
|
0.80645161 0.87096774 0.9172043 0.88548387]
|
|
|
|
mean value: 0.8738172043010752
|
|
|
|
key: train_roc_auc
|
|
value: [0.98021583 0.98561151 0.98561151 0.98561151 0.98201439 0.98201439
|
|
0.98741007 0.98561151 0.9856373 0.97846239]
|
|
|
|
mean value: 0.9838200407416002
|
|
|
|
key: test_jcc
|
|
value: [0.79411765 0.75675676 0.75757576 0.75757576 0.84848485 0.73529412
|
|
0.67567568 0.76470588 0.83870968 0.79411765]
|
|
|
|
mean value: 0.7723013767605797
|
|
|
|
key: train_jcc
|
|
value: [0.96126761 0.97163121 0.97153025 0.97163121 0.96466431 0.96466431
|
|
0.975 0.97153025 0.97173145 0.95789474]
|
|
|
|
mean value: 0.9681545322715445
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.79381752 0.74333286 0.76294351 0.77773452 0.77165318 0.73637152
|
|
0.75150609 0.73600364 0.74480796 0.75842977]
|
|
|
|
mean value: 0.7576600551605225
|
|
|
|
key: score_time
|
|
value: [0.01070094 0.00964355 0.01037812 0.01054597 0.00949955 0.00938725
|
|
0.00942111 0.00961471 0.00962543 0.0094974 ]
|
|
|
|
mean value: 0.009831404685974121
|
|
|
|
key: test_mcc
|
|
value: [0.96824584 0.93743687 0.93548387 1. 0.96824584 1.
|
|
0.90748521 0.93548387 0.87082935 0.96770777]
|
|
|
|
mean value: 0.9490918620004005
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98387097 0.96774194 0.96774194 1. 0.98387097 1.
|
|
0.9516129 0.96774194 0.93442623 0.98360656]
|
|
|
|
mean value: 0.9740613432046537
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98360656 0.96666667 0.96774194 1. 0.98360656 1.
|
|
0.95384615 0.96774194 0.93548387 0.98412698]
|
|
|
|
mean value: 0.9742820661329387
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.96774194 1. 1. 1.
|
|
0.91176471 0.96774194 0.90625 0.96875 ]
|
|
|
|
mean value: 0.9722248576850094
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.93548387 0.96774194 1. 0.96774194 1.
|
|
1. 0.96774194 0.96666667 1. ]
|
|
|
|
mean value: 0.9773118279569892
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98387097 0.96774194 0.96774194 1. 0.98387097 1.
|
|
0.9516129 0.96774194 0.93494624 0.98333333]
|
|
|
|
mean value: 0.9740860215053764
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96774194 0.93548387 0.9375 1. 0.96774194 1.
|
|
0.91176471 0.9375 0.87878788 0.96875 ]
|
|
|
|
mean value: 0.9505270326605716
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.2
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03455734 0.04919267 0.04343057 0.04365683 0.03343225 0.03359771
|
|
0.03364921 0.03464127 0.03343654 0.03420115]
|
|
|
|
mean value: 0.03737955093383789
|
|
|
|
key: score_time
|
|
value: [0.01318121 0.01318598 0.02024555 0.01931953 0.01600266 0.03063107
|
|
0.01645756 0.01628017 0.0201664 0.01660943]
|
|
|
|
mean value: 0.018207955360412597
|
|
|
|
key: test_mcc
|
|
value: [0.50083542 0.42289003 0.54953196 0.7130241 0.80645161 0.42289003
|
|
0.48488114 0.64549722 0.50975101 0.55307979]
|
|
|
|
mean value: 0.5608832308024235
|
|
|
|
key: train_mcc
|
|
value: [0.70615316 0.78802998 0.84192273 0.84576707 0.86411476 0.83287425
|
|
0.82387639 0.82642623 0.81259544 0.6893826 ]
|
|
|
|
mean value: 0.8031142630806966
|
|
|
|
key: test_accuracy
|
|
value: [0.74193548 0.70967742 0.77419355 0.85483871 0.90322581 0.70967742
|
|
0.74193548 0.82258065 0.75409836 0.7704918 ]
|
|
|
|
mean value: 0.7782654680063459
|
|
|
|
key: train_accuracy
|
|
value: [0.83273381 0.88309353 0.92086331 0.92266187 0.93165468 0.91366906
|
|
0.90827338 0.90647482 0.89766607 0.82226212]
|
|
|
|
mean value: 0.8939352647146197
|
|
|
|
key: test_fscore
|
|
value: [0.7037037 0.68965517 0.78125 0.84745763 0.90322581 0.68965517
|
|
0.75 0.81967213 0.73684211 0.75 ]
|
|
|
|
mean value: 0.7671461718512246
|
|
|
|
key: train_fscore
|
|
value: [0.79913607 0.86761711 0.92 0.9213894 0.93309859 0.90839695
|
|
0.9017341 0.8972332 0.88622754 0.7833698 ]
|
|
|
|
mean value: 0.8818202765481856
|
|
|
|
key: test_precision
|
|
value: [0.82608696 0.74074074 0.75757576 0.89285714 0.90322581 0.74074074
|
|
0.72727273 0.83333333 0.77777778 0.84 ]
|
|
|
|
mean value: 0.8039610983271572
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.93014706 0.93680297 0.9137931 0.96747967
|
|
0.97095436 0.99561404 1. 1. ]
|
|
|
|
mean value: 0.9714791202980441
|
|
|
|
key: test_recall
|
|
value: [0.61290323 0.64516129 0.80645161 0.80645161 0.90322581 0.64516129
|
|
0.77419355 0.80645161 0.7 0.67741935]
|
|
|
|
mean value: 0.7377419354838709
|
|
|
|
key: train_recall
|
|
value: [0.66546763 0.76618705 0.91007194 0.90647482 0.95323741 0.85611511
|
|
0.84172662 0.81654676 0.79569892 0.64388489]
|
|
|
|
mean value: 0.815541115494701
|
|
|
|
key: test_roc_auc
|
|
value: [0.74193548 0.70967742 0.77419355 0.85483871 0.90322581 0.70967742
|
|
0.74193548 0.82258065 0.75322581 0.77204301]
|
|
|
|
mean value: 0.7783333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.83273381 0.88309353 0.92086331 0.92266187 0.93165468 0.91366906
|
|
0.90827338 0.90647482 0.89784946 0.82194245]
|
|
|
|
mean value: 0.8939216368840411
|
|
|
|
key: test_jcc
|
|
value: [0.54285714 0.52631579 0.64102564 0.73529412 0.82352941 0.52631579
|
|
0.6 0.69444444 0.58333333 0.6 ]
|
|
|
|
mean value: 0.6273115670019694
|
|
|
|
key: train_jcc
|
|
value: [0.66546763 0.76618705 0.85185185 0.85423729 0.87458746 0.83216783
|
|
0.82105263 0.81362007 0.79569892 0.64388489]
|
|
|
|
mean value: 0.7918755627241194
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02617693 0.03780103 0.03633928 0.03645992 0.0377748 0.03427029
|
|
0.03479242 0.03106213 0.01793814 0.0177362 ]
|
|
|
|
mean value: 0.03103511333465576
|
|
|
|
key: score_time
|
|
value: [0.02564049 0.0231626 0.02571344 0.02222896 0.0219202 0.02516079
|
|
0.0265336 0.01335716 0.02909446 0.01307034]
|
|
|
|
mean value: 0.022588205337524415
|
|
|
|
key: test_mcc
|
|
value: [0.90369611 0.90369611 0.83914639 0.90369611 0.90369611 0.93548387
|
|
0.82199494 0.83914639 0.67858574 0.83984455]
|
|
|
|
mean value: 0.8568986328829187
|
|
|
|
key: train_mcc
|
|
value: [0.93987712 0.94305636 0.93214329 0.94986154 0.95705746 0.94634322
|
|
0.94634322 0.9393413 0.94643646 0.94277021]
|
|
|
|
mean value: 0.9443230181420963
|
|
|
|
key: test_accuracy
|
|
value: [0.9516129 0.9516129 0.91935484 0.9516129 0.9516129 0.96774194
|
|
0.90322581 0.91935484 0.83606557 0.91803279]
|
|
|
|
mean value: 0.9270227392913802
|
|
|
|
key: train_accuracy
|
|
value: [0.96942446 0.97122302 0.96582734 0.97482014 0.97841727 0.97302158
|
|
0.97302158 0.96942446 0.97307002 0.97127469]
|
|
|
|
mean value: 0.9719524559885305
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.95238095 0.91803279 0.95238095 0.95238095 0.96774194
|
|
0.91176471 0.91803279 0.84375 0.92307692]
|
|
|
|
mean value: 0.9291922947737448
|
|
|
|
key: train_fscore/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:195: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_orig.py:198: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
value: [0.97012302 0.97173145 0.96637168 0.97508897 0.97864769 0.97335702
|
|
0.97335702 0.9699115 0.97345133 0.97153025]
|
|
|
|
mean value: 0.9723569920770859
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.9375 0.93333333 0.9375 0.9375 0.96774194
|
|
0.83783784 0.93333333 0.79411765 0.88235294]
|
|
|
|
mean value: 0.909871702822367
|
|
|
|
key: train_precision
|
|
value: [0.94845361 0.95486111 0.95121951 0.96478873 0.96830986 0.96140351
|
|
0.96140351 0.95470383 0.96153846 0.96126761]
|
|
|
|
mean value: 0.9587949740571688
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.90322581 0.96774194 0.96774194 0.96774194
|
|
1. 0.90322581 0.9 0.96774194]
|
|
|
|
mean value: 0.9512903225806452
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.98920863 0.98201439 0.98561151 0.98920863 0.98561151
|
|
0.98561151 0.98561151 0.98566308 0.98201439]
|
|
|
|
mean value: 0.9863360924163894
|
|
|
|
key: test_roc_auc
|
|
value: [0.9516129 0.9516129 0.91935484 0.9516129 0.9516129 0.96774194
|
|
0.90322581 0.91935484 0.83709677 0.9172043 ]
|
|
|
|
mean value: 0.9270430107526882
|
|
|
|
key: train_roc_auc
|
|
value: [0.96942446 0.97122302 0.96582734 0.97482014 0.97841727 0.97302158
|
|
0.97302158 0.96942446 0.97304737 0.97129393]
|
|
|
|
mean value: 0.9719521157267734
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.90909091 0.84848485 0.90909091 0.90909091 0.9375
|
|
0.83783784 0.84848485 0.72972973 0.85714286]
|
|
|
|
mean value: 0.8695543758043758
|
|
|
|
key: train_jcc
|
|
value: [0.94197952 0.94501718 0.93493151 0.95138889 0.95818815 0.94809689
|
|
0.94809689 0.94158076 0.94827586 0.94463668]
|
|
|
|
mean value: 0.9462192321272894
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.19797468 0.3235662 0.41162133 0.29132533 0.34859681 0.40734649
|
|
0.45757914 0.48200107 0.28318882 0.37048697]
|
|
|
|
mean value: 0.3573686838150024
|
|
|
|
key: score_time
|
|
value: [0.02385783 0.01895332 0.01230931 0.01896501 0.02679181 0.02773142
|
|
0.02564216 0.02602696 0.0194571 0.01240873]
|
|
|
|
mean value: 0.02121436595916748
|
|
|
|
key: test_mcc
|
|
value: [0.90369611 0.90369611 0.83914639 0.90369611 0.87278605 0.93548387
|
|
0.82199494 0.83914639 0.71525965 0.83984455]
|
|
|
|
mean value: 0.8574750173276433
|
|
|
|
key: train_mcc
|
|
value: [0.93987712 0.94305636 0.93214329 0.94986154 0.95693359 0.94634322
|
|
0.94634322 0.9393413 0.94643646 0.94277021]
|
|
|
|
mean value: 0.9443106311332057
|
|
|
|
key: test_accuracy
|
|
value: [0.9516129 0.9516129 0.91935484 0.9516129 0.93548387 0.96774194
|
|
0.90322581 0.91935484 0.85245902 0.91803279]
|
|
|
|
mean value: 0.9270491803278689
|
|
|
|
key: train_accuracy
|
|
value: [0.96942446 0.97122302 0.96582734 0.97482014 0.97841727 0.97302158
|
|
0.97302158 0.96942446 0.97307002 0.97127469]
|
|
|
|
mean value: 0.9719524559885305
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.95238095 0.91803279 0.95238095 0.93333333 0.96774194
|
|
0.91176471 0.91803279 0.86153846 0.92307692]
|
|
|
|
mean value: 0.9290663790228291
|
|
|
|
key: train_fscore
|
|
value: [0.97012302 0.97173145 0.96637168 0.97508897 0.97857143 0.97335702
|
|
0.97335702 0.9699115 0.97345133 0.97153025]
|
|
|
|
mean value: 0.9723493662509547
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.9375 0.93333333 0.9375 0.96551724 0.96774194
|
|
0.83783784 0.93333333 0.8 0.88235294]
|
|
|
|
mean value: 0.9132616622544156
|
|
|
|
key: train_precision
|
|
value: [0.94845361 0.95486111 0.95121951 0.96478873 0.97163121 0.96140351
|
|
0.96140351 0.95470383 0.96153846 0.96126761]
|
|
|
|
mean value: 0.9591271087090518
|
|
|
|
key: test_recall
|
|
value: [0.96774194 0.96774194 0.90322581 0.96774194 0.90322581 0.96774194
|
|
1. 0.90322581 0.93333333 0.96774194]
|
|
|
|
mean value: 0.9481720430107528
|
|
|
|
key: train_recall
|
|
value: [0.99280576 0.98920863 0.98201439 0.98561151 0.98561151 0.98561151
|
|
0.98561151 0.98561151 0.98566308 0.98201439]
|
|
|
|
mean value: 0.9859763801861736
|
|
|
|
key: test_roc_auc
|
|
value: [0.9516129 0.9516129 0.91935484 0.9516129 0.93548387 0.96774194
|
|
0.90322581 0.91935484 0.85376344 0.9172043 ]
|
|
|
|
mean value: 0.9270967741935484
|
|
|
|
key: train_roc_auc
|
|
value: [0.96942446 0.97122302 0.96582734 0.97482014 0.97841727 0.97302158
|
|
0.97302158 0.96942446 0.97304737 0.97129393]
|
|
|
|
mean value: 0.9719521157267734
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.90909091 0.84848485 0.90909091 0.875 0.9375
|
|
0.83783784 0.84848485 0.75675676 0.85714286]
|
|
|
|
mean value: 0.8688479875979875
|
|
|
|
key: train_jcc
|
|
value: [0.94197952 0.94501718 0.93493151 0.95138889 0.95804196 0.94809689
|
|
0.94809689 0.94158076 0.94827586 0.94463668]
|
|
|
|
mean value: 0.9462046126004747
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.45
|